From 71e510540e817550f6e28ca0acc8136313160095 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Fri, 6 Mar 2015 12:27:11 +0000 Subject: Godeps: bump github.com/ethereum/ethash This fixes the build. --- Godeps/Godeps.json | 4 +- .../src/github.com/ethereum/ethash/.gitignore | 7 +- .../src/github.com/ethereum/ethash/.travis.yml | 6 + .../src/github.com/ethereum/ethash/CMakeLists.txt | 11 +- .../src/github.com/ethereum/ethash/Makefile | 3 + .../src/github.com/ethereum/ethash/README.md | 7 + .../src/github.com/ethereum/ethash/Vagrantfile | 7 + .../ethereum/ethash/benchmark/CMakeLists.txt | 53 - .../ethereum/ethash/benchmark/benchmark.cpp | 260 - .../ethash/cmake/modules/CMakeParseArguments.cmake | 161 + .../ethereum/ethash/cmake/modules/FindOpenCL.cmake | 217 +- .../modules/FindPackageHandleStandardArgs.cmake | 382 + .../ethash/cmake/modules/FindPackageMessage.cmake | 57 + .../src/github.com/ethereum/ethash/ethash.go | 215 +- .../src/github.com/ethereum/ethash/js/LICENSE | 22 + .../src/github.com/ethereum/ethash/js/ethash.js | 190 + .../src/github.com/ethereum/ethash/js/keccak.js | 404 + .../github.com/ethereum/ethash/js/makekeccak.js | 201 + .../src/github.com/ethereum/ethash/js/test.js | 53 + .../src/github.com/ethereum/ethash/js/util.js | 100 + .../ethereum/ethash/libethash-cl/CMakeLists.txt | 12 - .../github.com/ethereum/ethash/libethash-cl/cl.hpp | 12452 ------------------- .../ethash/libethash-cl/ethash_cl_miner.cpp | 754 -- .../ethereum/ethash/libethash-cl/ethash_cl_miner.h | 43 - .../ethereum/ethash/libethash-cuda/CMakeLists.txt | 15 - .../ethereum/ethash/libethash-cuda/cuPrintf.cu | 879 -- .../ethereum/ethash/libethash-cuda/cuPrintf.cuh | 162 - .../ethereum/ethash/libethash-cuda/libethash.cu | 27 - .../ethereum/ethash/libethash/CMakeLists.txt | 33 - .../ethereum/ethash/libethash/compiler.h | 33 - .../ethereum/ethash/libethash/data_sizes.h | 248 - .../github.com/ethereum/ethash/libethash/endian.h | 74 - .../github.com/ethereum/ethash/libethash/ethash.h | 88 - .../src/github.com/ethereum/ethash/libethash/fnv.h | 38 - .../ethereum/ethash/libethash/internal.c | 297 - .../ethereum/ethash/libethash/internal.h | 48 - .../github.com/ethereum/ethash/libethash/sha3.c | 151 - .../github.com/ethereum/ethash/libethash/sha3.h | 27 - .../ethereum/ethash/libethash/sha3_cryptopp.cpp | 34 - .../ethereum/ethash/libethash/sha3_cryptopp.h | 15 - .../github.com/ethereum/ethash/libethash/util.c | 41 - .../github.com/ethereum/ethash/libethash/util.h | 47 - .../ethereum/ethash/node-ethash/binding.gyp | 29 - .../ethereum/ethash/node-ethash/ethash.cc | 587 - .../ethereum/ethash/node-ethash/package.json | 13 - .../ethereum/ethash/node-ethash/readme.md | 12 - .../github.com/ethereum/ethash/pyethash/.gitignore | 2 + .../ethereum/ethash/pyethash/__init__.py | 3 + .../src/github.com/ethereum/ethash/setup.py | 21 + .../ethereum/ethash/src/benchmark/CMakeLists.txt | 53 + .../ethereum/ethash/src/benchmark/benchmark.cpp | 260 + .../ethash/src/libethash-cl/CMakeLists.txt | 15 + .../ethereum/ethash/src/libethash-cl/bin2h.cmake | 87 + .../ethereum/ethash/src/libethash-cl/cl.hpp | 12452 +++++++++++++++++++ .../ethash/src/libethash-cl/ethash_cl_miner.cpp | 289 + .../ethash/src/libethash-cl/ethash_cl_miner.h | 43 + .../src/libethash-cl/ethash_cl_miner_kernel.cl | 461 + .../ethereum/ethash/src/libethash/CMakeLists.txt | 39 + .../ethereum/ethash/src/libethash/compiler.h | 33 + .../ethereum/ethash/src/libethash/data_sizes.h | 779 ++ .../ethereum/ethash/src/libethash/endian.h | 74 + .../ethereum/ethash/src/libethash/ethash.h | 95 + .../github.com/ethereum/ethash/src/libethash/fnv.h | 38 + .../ethereum/ethash/src/libethash/internal.c | 298 + .../ethereum/ethash/src/libethash/internal.h | 48 + .../ethereum/ethash/src/libethash/sha3.c | 151 + .../ethereum/ethash/src/libethash/sha3.h | 27 + .../ethash/src/libethash/sha3_cryptopp.cpp | 34 + .../ethereum/ethash/src/libethash/sha3_cryptopp.h | 15 + .../ethereum/ethash/src/libethash/util.c | 41 + .../ethereum/ethash/src/libethash/util.h | 47 + .../github.com/ethereum/ethash/src/python/core.c | 64 + .../github.com/ethereum/ethash/test/CMakeLists.txt | 30 - .../ethereum/ethash/test/c/CMakeLists.txt | 30 + .../src/github.com/ethereum/ethash/test/c/test.cpp | 214 + .../src/github.com/ethereum/ethash/test/c/test.sh | 19 + .../ethereum/ethash/test/go/ethash_test.go | 54 - .../ethereum/ethash/test/python/.gitignore | 1 + .../ethereum/ethash/test/python/requirements.txt | 2 + .../github.com/ethereum/ethash/test/python/test.sh | 19 + .../ethereum/ethash/test/python/test_pyethash.py | 45 + .../src/github.com/ethereum/ethash/test/test.cpp | 233 - .../src/github.com/ethereum/ethash/test/test.sh | 26 + 83 files changed, 17699 insertions(+), 16962 deletions(-) create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/.travis.yml create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/Makefile create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/README.md create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/Vagrantfile delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/CMakeLists.txt delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/benchmark.cpp create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/CMakeParseArguments.cmake create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindPackageHandleStandardArgs.cmake create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindPackageMessage.cmake create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/js/LICENSE create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/js/ethash.js create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/js/keccak.js create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/js/makekeccak.js create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/js/test.js create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/js/util.js delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/CMakeLists.txt delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/cl.hpp delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.cpp delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.h delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/CMakeLists.txt delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cu delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cuh delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/libethash.cu delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/CMakeLists.txt delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/compiler.h delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/data_sizes.h delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/endian.h delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/ethash.h delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/fnv.h delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.c delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.h delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.c delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.h delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.cpp delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.h delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.c delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.h delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/binding.gyp delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/ethash.cc delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/package.json delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/readme.md create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/pyethash/.gitignore create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/pyethash/__init__.py create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/setup.py create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/benchmark/CMakeLists.txt create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/benchmark/benchmark.cpp create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/CMakeLists.txt create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/bin2h.cmake create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/cl.hpp create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/ethash_cl_miner.cpp create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/ethash_cl_miner.h create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/ethash_cl_miner_kernel.cl create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/CMakeLists.txt create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/compiler.h create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/data_sizes.h create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/endian.h create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/ethash.h create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/fnv.h create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/internal.c create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/internal.h create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/sha3.c create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/sha3.h create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/sha3_cryptopp.cpp create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/sha3_cryptopp.h create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/util.c create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/util.h create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/src/python/core.c delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/test/CMakeLists.txt create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/test/c/CMakeLists.txt create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/test/c/test.cpp create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/test/c/test.sh delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/test/go/ethash_test.go create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/test/python/.gitignore create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/test/python/requirements.txt create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/test/python/test.sh create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/test/python/test_pyethash.py delete mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/test/test.cpp create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/test/test.sh (limited to 'Godeps') diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index a1025c85d..3c1769529 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -17,8 +17,8 @@ }, { "ImportPath": "github.com/ethereum/ethash", - "Comment": "v17-23-g2561e13", - "Rev": "2561e1322a7e8e3d4a2cc903c44b1e96340bcb27" + "Comment": "v17-63-gbca024b", + "Rev": "bca024b0b30d83ec6798a5d4fa8c5fc6f937009a" }, { "ImportPath": "github.com/ethereum/serpent-go", diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/.gitignore b/Godeps/_workspace/src/github.com/ethereum/ethash/.gitignore index 6bb36ed15..3162bd828 100644 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/.gitignore +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/.gitignore @@ -2,4 +2,9 @@ .DS_Store */**/*un~ .vagrant/ -cpp-build/ +*.pyc +build/ +pyethash.egg-info/ +*.so +*~ +*.swp diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/.travis.yml b/Godeps/_workspace/src/github.com/ethereum/ethash/.travis.yml new file mode 100644 index 000000000..4b929eb69 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/.travis.yml @@ -0,0 +1,6 @@ +before_install: + - sudo apt-get update -qq + - sudo apt-get install -qq wget cmake gcc bash libboost-test-dev nodejs python-pip python-dev + - sudo pip install virtualenv -q + +script: "./test/test.sh" diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/CMakeLists.txt index ac189457f..c93395bf8 100644 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/CMakeLists.txt +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8.2) +cmake_minimum_required(VERSION 2.8.7) project(ethash) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/") @@ -8,7 +8,8 @@ if (WIN32 AND WANT_CRYPTOPP) add_subdirectory(cryptopp) endif() -add_subdirectory(libethash) -add_subdirectory(libethash-cl EXCLUDE_FROM_ALL) -add_subdirectory(benchmark EXCLUDE_FROM_ALL) -add_subdirectory(test EXCLUDE_FROM_ALL) +add_subdirectory(src/libethash) +# bin2h.cmake doesn't work +#add_subdirectory(src/libethash-cl EXCLUDE_FROM_ALL) +add_subdirectory(src/benchmark EXCLUDE_FROM_ALL) +add_subdirectory(test/c EXCLUDE_FROM_ALL) diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/Makefile b/Godeps/_workspace/src/github.com/ethereum/ethash/Makefile new file mode 100644 index 000000000..e2630581e --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/Makefile @@ -0,0 +1,3 @@ +.PHONY: clean +clean: + rm -rf *.so pyethash.egg-info/ build/ test/python/python-virtual-env/ test/c/build/ pyethash/*.{so,pyc} diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/README.md b/Godeps/_workspace/src/github.com/ethereum/ethash/README.md new file mode 100644 index 000000000..8b9e015cf --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/README.md @@ -0,0 +1,7 @@ +[![Build Status](https://travis-ci.org/ethereum/ethash.svg?branch=master)](https://travis-ci.org/ethereum/ethash) + + +# Ethash + +For details on this project, please see the Ethereum wiki: +https://github.com/ethereum/wiki/wiki/Ethash diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/Vagrantfile b/Godeps/_workspace/src/github.com/ethereum/ethash/Vagrantfile new file mode 100644 index 000000000..03891653f --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/Vagrantfile @@ -0,0 +1,7 @@ +# -*- mode: ruby -*- +# vi: set ft=ruby : + +Vagrant.configure(2) do |config| + config.vm.box = "Ubuntu 12.04" + config.vm.box_url = "https://cloud-images.ubuntu.com/vagrant/precise/current/precise-server-cloudimg-amd64-vagrant-disk1.box" +end diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/CMakeLists.txt deleted file mode 100644 index e6ba85790..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/CMakeLists.txt +++ /dev/null @@ -1,53 +0,0 @@ -include_directories(..) - -set(CMAKE_BUILD_TYPE Release) - -if (MSVC) - add_definitions("/openmp") -endif() - -if (NOT MPI_FOUND) - find_package(MPI) -endif() - -if (NOT CRYPTOPP_FOUND) - find_package(CryptoPP 5.6.2) -endif() - -if (CRYPTOPP_FOUND) - add_definitions(-DWITH_CRYPTOPP) -endif() - -if (NOT OpenCL_FOUND) - find_package(OpenCL) -endif() -if (OpenCL_FOUND) - add_definitions(-DWITH_OPENCL) - include_directories(${OpenCL_INCLUDE_DIRS}) - list(APPEND FILES ethash_cl_miner.cpp ethash_cl_miner.h) -endif() - -if (MPI_FOUND) - include_directories(${MPI_INCLUDE_PATH}) - add_executable (Benchmark_MPI_FULL benchmark.cpp) - target_link_libraries (Benchmark_MPI_FULL ${ETHHASH_LIBS} ${MPI_LIBRARIES}) - SET_TARGET_PROPERTIES(Benchmark_MPI_FULL PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} ${MPI_COMPILE_FLAGS} -DFULL -DMPI") - - add_executable (Benchmark_MPI_LIGHT benchmark.cpp) - target_link_libraries (Benchmark_MPI_LIGHT ${ETHHASH_LIBS} ${MPI_LIBRARIES}) - SET_TARGET_PROPERTIES(Benchmark_MPI_LIGHT PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} ${MPI_COMPILE_FLAGS} -DMPI") -endif() - -add_executable (Benchmark_FULL benchmark.cpp) -target_link_libraries (Benchmark_FULL ${ETHHASH_LIBS}) -SET_TARGET_PROPERTIES(Benchmark_FULL PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} -DFULL") - -add_executable (Benchmark_LIGHT benchmark.cpp) -target_link_libraries (Benchmark_LIGHT ${ETHHASH_LIBS}) - -if (OpenCL_FOUND) - add_executable (Benchmark_CL benchmark.cpp) - target_link_libraries (Benchmark_CL ${ETHHASH_LIBS} ethash-cl) - SET_TARGET_PROPERTIES(Benchmark_CL PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} -DOPENCL") -endif() - diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/benchmark.cpp b/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/benchmark.cpp deleted file mode 100644 index 4c8f700c5..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/benchmark.cpp +++ /dev/null @@ -1,260 +0,0 @@ -/* - This file is part of cpp-ethereum. - - cpp-ethereum is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - cpp-ethereum is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with cpp-ethereum. If not, see . -*/ -/** @file benchmark.cpp - * @author Tim Hughes - * @date 2015 - */ - -#include -#include -#include -#include -#include -#ifdef OPENCL -#include -#endif -#include -#include - -#ifdef WITH_CRYPTOPP -#include -#include - -#else -#include "libethash/sha3.h" -#endif // WITH_CRYPTOPP - -#undef min -#undef max - -#if defined(OPENCL) -const unsigned trials = 1024*1024*32; -#elif defined(FULL) -const unsigned trials = 1024*1024/8; -#else -const unsigned trials = 1024*1024/1024; -#endif -uint8_t g_hashes[1024*32]; - -static char nibbleToChar(unsigned nibble) -{ - return (char) ((nibble >= 10 ? 'a'-10 : '0') + nibble); -} - -static uint8_t charToNibble(char chr) -{ - if (chr >= '0' && chr <= '9') - { - return (uint8_t) (chr - '0'); - } - if (chr >= 'a' && chr <= 'z') - { - return (uint8_t) (chr - 'a' + 10); - } - if (chr >= 'A' && chr <= 'Z') - { - return (uint8_t) (chr - 'A' + 10); - } - return 0; -} - -static std::vector hexStringToBytes(char const* str) -{ - std::vector bytes(strlen(str) >> 1); - for (unsigned i = 0; i != bytes.size(); ++i) - { - bytes[i] = charToNibble(str[i*2 | 0]) << 4; - bytes[i] |= charToNibble(str[i*2 | 1]); - } - return bytes; -} - -static std::string bytesToHexString(uint8_t const* bytes, unsigned size) -{ - std::string str; - for (unsigned i = 0; i != size; ++i) - { - str += nibbleToChar(bytes[i] >> 4); - str += nibbleToChar(bytes[i] & 0xf); - } - return str; -} - -extern "C" int main(void) -{ - // params for ethash - ethash_params params; - ethash_params_init(¶ms, 0); - //params.full_size = 262147 * 4096; // 1GBish; - //params.full_size = 32771 * 4096; // 128MBish; - //params.full_size = 8209 * 4096; // 8MBish; - //params.cache_size = 8209*4096; - //params.cache_size = 2053*4096; - uint8_t seed[32], previous_hash[32]; - - memcpy(seed, hexStringToBytes("9410b944535a83d9adf6bbdcc80e051f30676173c16ca0d32d6f1263fc246466").data(), 32); - memcpy(previous_hash, hexStringToBytes("c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470").data(), 32); - - // allocate page aligned buffer for dataset -#ifdef FULL - void* full_mem_buf = malloc(params.full_size + 4095); - void* full_mem = (void*)((uintptr_t(full_mem_buf) + 4095) & ~4095); -#endif - void* cache_mem_buf = malloc(params.cache_size + 63); - void* cache_mem = (void*)((uintptr_t(cache_mem_buf) + 63) & ~63); - - ethash_cache cache; - cache.mem = cache_mem; - - // compute cache or full data - { - clock_t startTime = clock(); - ethash_mkcache(&cache, ¶ms, seed); - clock_t time = clock() - startTime; - - uint8_t cache_hash[32]; - SHA3_256(cache_hash, (uint8_t const*)cache_mem, params.cache_size); - debugf("ethash_mkcache: %ums, sha3: %s\n", (unsigned)((time*1000)/CLOCKS_PER_SEC), bytesToHexString(cache_hash,sizeof(cache_hash)).data()); - - // print a couple of test hashes - { - const clock_t startTime = clock(); - ethash_return_value hash; - ethash_light(&hash, &cache, ¶ms, previous_hash, 0); - const clock_t time = clock() - startTime; - debugf("ethash_light test: %ums, %s\n", (unsigned)((time*1000)/CLOCKS_PER_SEC), bytesToHexString(hash.result, 32).data()); - } - - #ifdef FULL - startTime = clock(); - ethash_compute_full_data(full_mem, ¶ms, &cache); - time = clock() - startTime; - debugf("ethash_compute_full_data: %ums\n", (unsigned)((time*1000)/CLOCKS_PER_SEC)); - #endif // FULL - } - -#ifdef OPENCL - ethash_cl_miner miner; - { - const clock_t startTime = clock(); - if (!miner.init(params, seed)) - exit(-1); - const clock_t time = clock() - startTime; - debugf("ethash_cl_miner init: %ums\n", (unsigned)((time*1000)/CLOCKS_PER_SEC)); - } -#endif - - -#ifdef FULL - { - const clock_t startTime = clock(); - ethash_return_value hash; - ethash_full(&hash, full_mem, ¶ms, previous_hash, 0); - const clock_t time = clock() - startTime; - debugf("ethash_full test: %uns, %s\n", (unsigned)((time*1000000)/CLOCKS_PER_SEC), bytesToHexString(hash.result, 32).data()); - } -#endif - -#ifdef OPENCL - // validate 1024 hashes against CPU - miner.hash(g_hashes, previous_hash, 0, 1024); - for (unsigned i = 0; i != 1024; ++i) - { - ethash_return_value hash; - ethash_light(&hash, &cache, ¶ms, previous_hash, i); - if (memcmp(hash.result, g_hashes + 32*i, 32) != 0) - { - debugf("nonce %u failed: %s %s\n", i, bytesToHexString(g_hashes + 32*i, 32).c_str(), bytesToHexString(hash.result, 32).c_str()); - static unsigned c = 0; - if (++c == 16) - { - exit(-1); - } - } - } -#endif - - - clock_t startTime = clock(); - unsigned hash_count = trials; - - #ifdef OPENCL - { - struct search_hook : ethash_cl_miner::search_hook - { - unsigned hash_count; - std::vector nonce_vec; - - virtual bool found(uint64_t const* nonces, uint32_t count) - { - nonce_vec.assign(nonces, nonces + count); - return false; - } - - virtual bool searched(uint64_t start_nonce, uint32_t count) - { - // do nothing - hash_count += count; - return hash_count >= trials; - } - }; - search_hook hook; - hook.hash_count = 0; - - miner.search(previous_hash, 0x000000ffffffffff, hook); - - for (unsigned i = 0; i != hook.nonce_vec.size(); ++i) - { - uint64_t nonce = hook.nonce_vec[i]; - ethash_return_value hash; - ethash_light(&hash, &cache, ¶ms, previous_hash, nonce); - debugf("found: %.8x%.8x -> %s\n", unsigned(nonce>>32), unsigned(nonce), bytesToHexString(hash.result, 32).c_str()); - } - - hash_count = hook.hash_count; - } - #else - { - //#pragma omp parallel for - for (int nonce = 0; nonce < trials; ++nonce) - { - ethash_return_value hash; - #ifdef FULL - ethash_full(&hash, full_mem, ¶ms, previous_hash, nonce); - #else - ethash_light(&hash, &cache, ¶ms, previous_hash, nonce); - #endif // FULL - } - } - #endif - - clock_t time = std::max((clock_t)1u, clock() - startTime); - - unsigned read_size = ACCESSES * MIX_BYTES; - debugf( - "hashrate: %8u, bw: %6u MB/s\n", - (unsigned)(((uint64_t)hash_count*CLOCKS_PER_SEC)/time), - (unsigned)((((uint64_t)hash_count*read_size*CLOCKS_PER_SEC)/time) / (1024*1024)) - ); - - free(cache_mem_buf); -#ifdef FULL - free(full_mem_buf); -#endif - - return 0; -} diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/CMakeParseArguments.cmake b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/CMakeParseArguments.cmake new file mode 100644 index 000000000..8553f38f5 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/CMakeParseArguments.cmake @@ -0,0 +1,161 @@ +#.rst: +# CMakeParseArguments +# ------------------- +# +# +# +# CMAKE_PARSE_ARGUMENTS( +# args...) +# +# CMAKE_PARSE_ARGUMENTS() is intended to be used in macros or functions +# for parsing the arguments given to that macro or function. It +# processes the arguments and defines a set of variables which hold the +# values of the respective options. +# +# The argument contains all options for the respective macro, +# i.e. keywords which can be used when calling the macro without any +# value following, like e.g. the OPTIONAL keyword of the install() +# command. +# +# The argument contains all keywords for this macro +# which are followed by one value, like e.g. DESTINATION keyword of the +# install() command. +# +# The argument contains all keywords for this +# macro which can be followed by more than one value, like e.g. the +# TARGETS or FILES keywords of the install() command. +# +# When done, CMAKE_PARSE_ARGUMENTS() will have defined for each of the +# keywords listed in , and +# a variable composed of the given +# followed by "_" and the name of the respective keyword. These +# variables will then hold the respective value from the argument list. +# For the keywords this will be TRUE or FALSE. +# +# All remaining arguments are collected in a variable +# _UNPARSED_ARGUMENTS, this can be checked afterwards to see +# whether your macro was called with unrecognized parameters. +# +# As an example here a my_install() macro, which takes similar arguments +# as the real install() command: +# +# :: +# +# function(MY_INSTALL) +# set(options OPTIONAL FAST) +# set(oneValueArgs DESTINATION RENAME) +# set(multiValueArgs TARGETS CONFIGURATIONS) +# cmake_parse_arguments(MY_INSTALL "${options}" "${oneValueArgs}" +# "${multiValueArgs}" ${ARGN} ) +# ... +# +# +# +# Assume my_install() has been called like this: +# +# :: +# +# my_install(TARGETS foo bar DESTINATION bin OPTIONAL blub) +# +# +# +# After the cmake_parse_arguments() call the macro will have set the +# following variables: +# +# :: +# +# MY_INSTALL_OPTIONAL = TRUE +# MY_INSTALL_FAST = FALSE (this option was not used when calling my_install() +# MY_INSTALL_DESTINATION = "bin" +# MY_INSTALL_RENAME = "" (was not used) +# MY_INSTALL_TARGETS = "foo;bar" +# MY_INSTALL_CONFIGURATIONS = "" (was not used) +# MY_INSTALL_UNPARSED_ARGUMENTS = "blub" (no value expected after "OPTIONAL" +# +# +# +# You can then continue and process these variables. +# +# Keywords terminate lists of values, e.g. if directly after a +# one_value_keyword another recognized keyword follows, this is +# interpreted as the beginning of the new option. E.g. +# my_install(TARGETS foo DESTINATION OPTIONAL) would result in +# MY_INSTALL_DESTINATION set to "OPTIONAL", but MY_INSTALL_DESTINATION +# would be empty and MY_INSTALL_OPTIONAL would be set to TRUE therefor. + +#============================================================================= +# Copyright 2010 Alexander Neundorf +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of CMake, substitute the full +# License text for the above reference.) + + +if(__CMAKE_PARSE_ARGUMENTS_INCLUDED) + return() +endif() +set(__CMAKE_PARSE_ARGUMENTS_INCLUDED TRUE) + + +function(CMAKE_PARSE_ARGUMENTS prefix _optionNames _singleArgNames _multiArgNames) + # first set all result variables to empty/FALSE + foreach(arg_name ${_singleArgNames} ${_multiArgNames}) + set(${prefix}_${arg_name}) + endforeach() + + foreach(option ${_optionNames}) + set(${prefix}_${option} FALSE) + endforeach() + + set(${prefix}_UNPARSED_ARGUMENTS) + + set(insideValues FALSE) + set(currentArgName) + + # now iterate over all arguments and fill the result variables + foreach(currentArg ${ARGN}) + list(FIND _optionNames "${currentArg}" optionIndex) # ... then this marks the end of the arguments belonging to this keyword + list(FIND _singleArgNames "${currentArg}" singleArgIndex) # ... then this marks the end of the arguments belonging to this keyword + list(FIND _multiArgNames "${currentArg}" multiArgIndex) # ... then this marks the end of the arguments belonging to this keyword + + if(${optionIndex} EQUAL -1 AND ${singleArgIndex} EQUAL -1 AND ${multiArgIndex} EQUAL -1) + if(insideValues) + if("${insideValues}" STREQUAL "SINGLE") + set(${prefix}_${currentArgName} ${currentArg}) + set(insideValues FALSE) + elseif("${insideValues}" STREQUAL "MULTI") + list(APPEND ${prefix}_${currentArgName} ${currentArg}) + endif() + else() + list(APPEND ${prefix}_UNPARSED_ARGUMENTS ${currentArg}) + endif() + else() + if(NOT ${optionIndex} EQUAL -1) + set(${prefix}_${currentArg} TRUE) + set(insideValues FALSE) + elseif(NOT ${singleArgIndex} EQUAL -1) + set(currentArgName ${currentArg}) + set(${prefix}_${currentArgName}) + set(insideValues "SINGLE") + elseif(NOT ${multiArgIndex} EQUAL -1) + set(currentArgName ${currentArg}) + set(${prefix}_${currentArgName}) + set(insideValues "MULTI") + endif() + endif() + + endforeach() + + # propagate the result variables to the caller: + foreach(arg_name ${_singleArgNames} ${_multiArgNames} ${_optionNames}) + set(${prefix}_${arg_name} ${${prefix}_${arg_name}} PARENT_SCOPE) + endforeach() + set(${prefix}_UNPARSED_ARGUMENTS ${${prefix}_UNPARSED_ARGUMENTS} PARENT_SCOPE) + +endfunction() diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindOpenCL.cmake b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindOpenCL.cmake index cc567c95e..4d3ed842c 100644 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindOpenCL.cmake +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindOpenCL.cmake @@ -1,91 +1,136 @@ +#.rst: +# FindOpenCL +# ---------- # -# This file taken from FindOpenCL project @ http://gitorious.com/findopencl +# Try to find OpenCL # -# - Try to find OpenCL -# This module tries to find an OpenCL implementation on your system. It supports -# AMD / ATI, Apple and NVIDIA implementations, but shoudl work, too. +# Once done this will define:: # -# Once done this will define -# OPENCL_FOUND - system has OpenCL -# OPENCL_INCLUDE_DIRS - the OpenCL include directory -# OPENCL_LIBRARIES - link these to use OpenCL +# OpenCL_FOUND - True if OpenCL was found +# OpenCL_INCLUDE_DIRS - include directories for OpenCL +# OpenCL_LIBRARIES - link against this library to use OpenCL +# OpenCL_VERSION_STRING - Highest supported OpenCL version (eg. 1.2) +# OpenCL_VERSION_MAJOR - The major version of the OpenCL implementation +# OpenCL_VERSION_MINOR - The minor version of the OpenCL implementation +# +# The module will also define two cache variables:: +# +# OpenCL_INCLUDE_DIR - the OpenCL include directory +# OpenCL_LIBRARY - the path to the OpenCL library # -# WIN32 should work, but is untested - -FIND_PACKAGE( PackageHandleStandardArgs ) - -SET (OPENCL_VERSION_STRING "0.1.0") -SET (OPENCL_VERSION_MAJOR 0) -SET (OPENCL_VERSION_MINOR 1) -SET (OPENCL_VERSION_PATCH 0) - -IF (APPLE) - - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL DOC "OpenCL lib for OSX") - FIND_PATH(OPENCL_INCLUDE_DIRS OpenCL/cl.h DOC "Include for OpenCL on OSX") - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS OpenCL/cl.hpp DOC "Include for OpenCL CPP bindings on OSX") - -ELSE (APPLE) - - IF (WIN32) - - FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h) - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp) - - # The AMD SDK currently installs both x86 and x86_64 libraries - # This is only a hack to find out architecture - IF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" ) - SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86_64") - SET(OPENCL_LIB_DIR "$ENV{ATIINTERNALSTREAMSDKROOT}/lib/x86_64") - ELSE (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64") - SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86") - SET(OPENCL_LIB_DIR "$ENV{ATIINTERNALSTREAMSDKROOT}/lib/x86") - ENDIF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" ) - - # find out if the user asked for a 64-bit build, and use the corresponding - # 64 or 32 bit NVIDIA library paths to the search: - STRING(REGEX MATCH "Win64" ISWIN64 ${CMAKE_GENERATOR}) - IF("${ISWIN64}" STREQUAL "Win64") - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL.lib ${OPENCL_LIB_DIR} $ENV{CUDA_LIB_PATH} $ENV{CUDA_PATH}/lib/x64) - ELSE("${ISWIN64}" STREQUAL "Win64") - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL.lib ${OPENCL_LIB_DIR} $ENV{CUDA_LIB_PATH} $ENV{CUDA_PATH}/lib/Win32) - ENDIF("${ISWIN64}" STREQUAL "Win64") - - GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE) - - # On Win32 search relative to the library - FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS "${_OPENCL_INC_CAND}" $ENV{CUDA_INC_PATH} $ENV{CUDA_PATH}/include) - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS "${_OPENCL_INC_CAND}" $ENV{CUDA_INC_PATH} $ENV{CUDA_PATH}/include) - - ELSE (WIN32) - - # Unix style platforms - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL - ENV LD_LIBRARY_PATH - ) - - GET_FILENAME_COMPONENT(OPENCL_LIB_DIR ${OPENCL_LIBRARIES} PATH) - GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE) - - # The AMD SDK currently does not place its headers - # in /usr/include, therefore also search relative - # to the library - FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include") - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include") - - ENDIF (WIN32) - -ENDIF (APPLE) - -FIND_PACKAGE_HANDLE_STANDARD_ARGS( OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS ) - -IF( _OPENCL_CPP_INCLUDE_DIRS ) - SET( OPENCL_HAS_CPP_BINDINGS TRUE ) - LIST( APPEND OPENCL_INCLUDE_DIRS ${_OPENCL_CPP_INCLUDE_DIRS} ) - # This is often the same, so clean up - LIST( REMOVE_DUPLICATES OPENCL_INCLUDE_DIRS ) -ENDIF( _OPENCL_CPP_INCLUDE_DIRS ) -MARK_AS_ADVANCED( - OPENCL_INCLUDE_DIRS -) \ No newline at end of file +#============================================================================= +# Copyright 2014 Matthaeus G. Chajdas +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of CMake, substitute the full +# License text for the above reference.) + +function(_FIND_OPENCL_VERSION) + include(CheckSymbolExists) + include(CMakePushCheckState) + set(CMAKE_REQUIRED_QUIET ${OpenCL_FIND_QUIETLY}) + + CMAKE_PUSH_CHECK_STATE() + foreach(VERSION "2_0" "1_2" "1_1" "1_0") + set(CMAKE_REQUIRED_INCLUDES "${OpenCL_INCLUDE_DIR}") + + if(APPLE) + CHECK_SYMBOL_EXISTS( + CL_VERSION_${VERSION} + "${OpenCL_INCLUDE_DIR}/OpenCL/cl.h" + OPENCL_VERSION_${VERSION}) + else() + CHECK_SYMBOL_EXISTS( + CL_VERSION_${VERSION} + "${OpenCL_INCLUDE_DIR}/CL/cl.h" + OPENCL_VERSION_${VERSION}) + endif() + + if(OPENCL_VERSION_${VERSION}) + string(REPLACE "_" "." VERSION "${VERSION}") + set(OpenCL_VERSION_STRING ${VERSION} PARENT_SCOPE) + string(REGEX MATCHALL "[0-9]+" version_components "${VERSION}") + list(GET version_components 0 major_version) + list(GET version_components 1 minor_version) + set(OpenCL_VERSION_MAJOR ${major_version} PARENT_SCOPE) + set(OpenCL_VERSION_MINOR ${minor_version} PARENT_SCOPE) + break() + endif() + endforeach() + CMAKE_POP_CHECK_STATE() +endfunction() + +find_path(OpenCL_INCLUDE_DIR + NAMES + CL/cl.h OpenCL/cl.h + PATHS + ENV "PROGRAMFILES(X86)" + ENV AMDAPPSDKROOT + ENV INTELOCLSDKROOT + ENV NVSDKCOMPUTE_ROOT + ENV CUDA_PATH + ENV ATISTREAMSDKROOT + PATH_SUFFIXES + include + OpenCL/common/inc + "AMD APP/include") + +_FIND_OPENCL_VERSION() + +if(WIN32) + if(CMAKE_SIZEOF_VOID_P EQUAL 4) + find_library(OpenCL_LIBRARY + NAMES OpenCL + PATHS + ENV "PROGRAMFILES(X86)" + ENV AMDAPPSDKROOT + ENV INTELOCLSDKROOT + ENV CUDA_PATH + ENV NVSDKCOMPUTE_ROOT + ENV ATISTREAMSDKROOT + PATH_SUFFIXES + "AMD APP/lib/x86" + lib/x86 + lib/Win32 + OpenCL/common/lib/Win32) + elseif(CMAKE_SIZEOF_VOID_P EQUAL 8) + find_library(OpenCL_LIBRARY + NAMES OpenCL + PATHS + ENV "PROGRAMFILES(X86)" + ENV AMDAPPSDKROOT + ENV INTELOCLSDKROOT + ENV CUDA_PATH + ENV NVSDKCOMPUTE_ROOT + ENV ATISTREAMSDKROOT + PATH_SUFFIXES + "AMD APP/lib/x86_64" + lib/x86_64 + lib/x64 + OpenCL/common/lib/x64) + endif() +else() + find_library(OpenCL_LIBRARY + NAMES OpenCL) +endif() + +set(OpenCL_LIBRARIES ${OpenCL_LIBRARY}) +set(OpenCL_INCLUDE_DIRS ${OpenCL_INCLUDE_DIR}) + +include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake) +find_package_handle_standard_args( + OpenCL + FOUND_VAR OpenCL_FOUND + REQUIRED_VARS OpenCL_LIBRARY OpenCL_INCLUDE_DIR + VERSION_VAR OpenCL_VERSION_STRING) + +mark_as_advanced( + OpenCL_INCLUDE_DIR + OpenCL_LIBRARY) diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindPackageHandleStandardArgs.cmake b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindPackageHandleStandardArgs.cmake new file mode 100644 index 000000000..6bcf1e788 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindPackageHandleStandardArgs.cmake @@ -0,0 +1,382 @@ +#.rst: +# FindPackageHandleStandardArgs +# ----------------------------- +# +# +# +# FIND_PACKAGE_HANDLE_STANDARD_ARGS( ... ) +# +# This function is intended to be used in FindXXX.cmake modules files. +# It handles the REQUIRED, QUIET and version-related arguments to +# find_package(). It also sets the _FOUND variable. The +# package is considered found if all variables ... listed contain +# valid results, e.g. valid filepaths. +# +# There are two modes of this function. The first argument in both +# modes is the name of the Find-module where it is called (in original +# casing). +# +# The first simple mode looks like this: +# +# :: +# +# FIND_PACKAGE_HANDLE_STANDARD_ARGS( +# (DEFAULT_MSG|"Custom failure message") ... ) +# +# If the variables to are all valid, then +# _FOUND will be set to TRUE. If DEFAULT_MSG is given +# as second argument, then the function will generate itself useful +# success and error messages. You can also supply a custom error +# message for the failure case. This is not recommended. +# +# The second mode is more powerful and also supports version checking: +# +# :: +# +# FIND_PACKAGE_HANDLE_STANDARD_ARGS(NAME +# [FOUND_VAR ] +# [REQUIRED_VARS ...] +# [VERSION_VAR ] +# [HANDLE_COMPONENTS] +# [CONFIG_MODE] +# [FAIL_MESSAGE "Custom failure message"] ) +# +# In this mode, the name of the result-variable can be set either to +# either _FOUND or _FOUND using the +# FOUND_VAR option. Other names for the result-variable are not +# allowed. So for a Find-module named FindFooBar.cmake, the two +# possible names are FooBar_FOUND and FOOBAR_FOUND. It is recommended +# to use the original case version. If the FOUND_VAR option is not +# used, the default is _FOUND. +# +# As in the simple mode, if through are all valid, +# _FOUND will be set to TRUE. After REQUIRED_VARS the +# variables which are required for this package are listed. Following +# VERSION_VAR the name of the variable can be specified which holds the +# version of the package which has been found. If this is done, this +# version will be checked against the (potentially) specified required +# version used in the find_package() call. The EXACT keyword is also +# handled. The default messages include information about the required +# version and the version which has been actually found, both if the +# version is ok or not. If the package supports components, use the +# HANDLE_COMPONENTS option to enable handling them. In this case, +# find_package_handle_standard_args() will report which components have +# been found and which are missing, and the _FOUND variable +# will be set to FALSE if any of the required components (i.e. not the +# ones listed after OPTIONAL_COMPONENTS) are missing. Use the option +# CONFIG_MODE if your FindXXX.cmake module is a wrapper for a +# find_package(... NO_MODULE) call. In this case VERSION_VAR will be +# set to _VERSION and the macro will automatically check whether +# the Config module was found. Via FAIL_MESSAGE a custom failure +# message can be specified, if this is not used, the default message +# will be displayed. +# +# Example for mode 1: +# +# :: +# +# find_package_handle_standard_args(LibXml2 DEFAULT_MSG +# LIBXML2_LIBRARY LIBXML2_INCLUDE_DIR) +# +# +# +# LibXml2 is considered to be found, if both LIBXML2_LIBRARY and +# LIBXML2_INCLUDE_DIR are valid. Then also LIBXML2_FOUND is set to +# TRUE. If it is not found and REQUIRED was used, it fails with +# FATAL_ERROR, independent whether QUIET was used or not. If it is +# found, success will be reported, including the content of . On +# repeated Cmake runs, the same message won't be printed again. +# +# Example for mode 2: +# +# :: +# +# find_package_handle_standard_args(LibXslt +# FOUND_VAR LibXslt_FOUND +# REQUIRED_VARS LibXslt_LIBRARIES LibXslt_INCLUDE_DIRS +# VERSION_VAR LibXslt_VERSION_STRING) +# +# In this case, LibXslt is considered to be found if the variable(s) +# listed after REQUIRED_VAR are all valid, i.e. LibXslt_LIBRARIES and +# LibXslt_INCLUDE_DIRS in this case. The result will then be stored in +# LibXslt_FOUND . Also the version of LibXslt will be checked by using +# the version contained in LibXslt_VERSION_STRING. Since no +# FAIL_MESSAGE is given, the default messages will be printed. +# +# Another example for mode 2: +# +# :: +# +# find_package(Automoc4 QUIET NO_MODULE HINTS /opt/automoc4) +# find_package_handle_standard_args(Automoc4 CONFIG_MODE) +# +# In this case, FindAutmoc4.cmake wraps a call to find_package(Automoc4 +# NO_MODULE) and adds an additional search directory for automoc4. Here +# the result will be stored in AUTOMOC4_FOUND. The following +# FIND_PACKAGE_HANDLE_STANDARD_ARGS() call produces a proper +# success/error message. + +#============================================================================= +# Copyright 2007-2009 Kitware, Inc. +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of CMake, substitute the full +# License text for the above reference.) + +include(${CMAKE_CURRENT_LIST_DIR}/FindPackageMessage.cmake) +include(${CMAKE_CURRENT_LIST_DIR}/CMakeParseArguments.cmake) + +# internal helper macro +macro(_FPHSA_FAILURE_MESSAGE _msg) + if (${_NAME}_FIND_REQUIRED) + message(FATAL_ERROR "${_msg}") + else () + if (NOT ${_NAME}_FIND_QUIETLY) + message(STATUS "${_msg}") + endif () + endif () +endmacro() + + +# internal helper macro to generate the failure message when used in CONFIG_MODE: +macro(_FPHSA_HANDLE_FAILURE_CONFIG_MODE) + # _CONFIG is set, but FOUND is false, this means that some other of the REQUIRED_VARS was not found: + if(${_NAME}_CONFIG) + _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: missing: ${MISSING_VARS} (found ${${_NAME}_CONFIG} ${VERSION_MSG})") + else() + # If _CONSIDERED_CONFIGS is set, the config-file has been found, but no suitable version. + # List them all in the error message: + if(${_NAME}_CONSIDERED_CONFIGS) + set(configsText "") + list(LENGTH ${_NAME}_CONSIDERED_CONFIGS configsCount) + math(EXPR configsCount "${configsCount} - 1") + foreach(currentConfigIndex RANGE ${configsCount}) + list(GET ${_NAME}_CONSIDERED_CONFIGS ${currentConfigIndex} filename) + list(GET ${_NAME}_CONSIDERED_VERSIONS ${currentConfigIndex} version) + set(configsText "${configsText} ${filename} (version ${version})\n") + endforeach() + if (${_NAME}_NOT_FOUND_MESSAGE) + set(configsText "${configsText} Reason given by package: ${${_NAME}_NOT_FOUND_MESSAGE}\n") + endif() + _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} ${VERSION_MSG}, checked the following files:\n${configsText}") + + else() + # Simple case: No Config-file was found at all: + _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: found neither ${_NAME}Config.cmake nor ${_NAME_LOWER}-config.cmake ${VERSION_MSG}") + endif() + endif() +endmacro() + + +function(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FIRST_ARG) + +# set up the arguments for CMAKE_PARSE_ARGUMENTS and check whether we are in +# new extended or in the "old" mode: + set(options CONFIG_MODE HANDLE_COMPONENTS) + set(oneValueArgs FAIL_MESSAGE VERSION_VAR FOUND_VAR) + set(multiValueArgs REQUIRED_VARS) + set(_KEYWORDS_FOR_EXTENDED_MODE ${options} ${oneValueArgs} ${multiValueArgs} ) + list(FIND _KEYWORDS_FOR_EXTENDED_MODE "${_FIRST_ARG}" INDEX) + + if(${INDEX} EQUAL -1) + set(FPHSA_FAIL_MESSAGE ${_FIRST_ARG}) + set(FPHSA_REQUIRED_VARS ${ARGN}) + set(FPHSA_VERSION_VAR) + else() + + CMAKE_PARSE_ARGUMENTS(FPHSA "${options}" "${oneValueArgs}" "${multiValueArgs}" ${_FIRST_ARG} ${ARGN}) + + if(FPHSA_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Unknown keywords given to FIND_PACKAGE_HANDLE_STANDARD_ARGS(): \"${FPHSA_UNPARSED_ARGUMENTS}\"") + endif() + + if(NOT FPHSA_FAIL_MESSAGE) + set(FPHSA_FAIL_MESSAGE "DEFAULT_MSG") + endif() + endif() + +# now that we collected all arguments, process them + + if("x${FPHSA_FAIL_MESSAGE}" STREQUAL "xDEFAULT_MSG") + set(FPHSA_FAIL_MESSAGE "Could NOT find ${_NAME}") + endif() + + # In config-mode, we rely on the variable _CONFIG, which is set by find_package() + # when it successfully found the config-file, including version checking: + if(FPHSA_CONFIG_MODE) + list(INSERT FPHSA_REQUIRED_VARS 0 ${_NAME}_CONFIG) + list(REMOVE_DUPLICATES FPHSA_REQUIRED_VARS) + set(FPHSA_VERSION_VAR ${_NAME}_VERSION) + endif() + + if(NOT FPHSA_REQUIRED_VARS) + message(FATAL_ERROR "No REQUIRED_VARS specified for FIND_PACKAGE_HANDLE_STANDARD_ARGS()") + endif() + + list(GET FPHSA_REQUIRED_VARS 0 _FIRST_REQUIRED_VAR) + + string(TOUPPER ${_NAME} _NAME_UPPER) + string(TOLOWER ${_NAME} _NAME_LOWER) + + if(FPHSA_FOUND_VAR) + if(FPHSA_FOUND_VAR MATCHES "^${_NAME}_FOUND$" OR FPHSA_FOUND_VAR MATCHES "^${_NAME_UPPER}_FOUND$") + set(_FOUND_VAR ${FPHSA_FOUND_VAR}) + else() + message(FATAL_ERROR "The argument for FOUND_VAR is \"${FPHSA_FOUND_VAR}\", but only \"${_NAME}_FOUND\" and \"${_NAME_UPPER}_FOUND\" are valid names.") + endif() + else() + set(_FOUND_VAR ${_NAME_UPPER}_FOUND) + endif() + + # collect all variables which were not found, so they can be printed, so the + # user knows better what went wrong (#6375) + set(MISSING_VARS "") + set(DETAILS "") + # check if all passed variables are valid + unset(${_FOUND_VAR}) + foreach(_CURRENT_VAR ${FPHSA_REQUIRED_VARS}) + if(NOT ${_CURRENT_VAR}) + set(${_FOUND_VAR} FALSE) + set(MISSING_VARS "${MISSING_VARS} ${_CURRENT_VAR}") + else() + set(DETAILS "${DETAILS}[${${_CURRENT_VAR}}]") + endif() + endforeach() + if(NOT "${${_FOUND_VAR}}" STREQUAL "FALSE") + set(${_FOUND_VAR} TRUE) + endif() + + # component handling + unset(FOUND_COMPONENTS_MSG) + unset(MISSING_COMPONENTS_MSG) + + if(FPHSA_HANDLE_COMPONENTS) + foreach(comp ${${_NAME}_FIND_COMPONENTS}) + if(${_NAME}_${comp}_FOUND) + + if(NOT DEFINED FOUND_COMPONENTS_MSG) + set(FOUND_COMPONENTS_MSG "found components: ") + endif() + set(FOUND_COMPONENTS_MSG "${FOUND_COMPONENTS_MSG} ${comp}") + + else() + + if(NOT DEFINED MISSING_COMPONENTS_MSG) + set(MISSING_COMPONENTS_MSG "missing components: ") + endif() + set(MISSING_COMPONENTS_MSG "${MISSING_COMPONENTS_MSG} ${comp}") + + if(${_NAME}_FIND_REQUIRED_${comp}) + set(${_FOUND_VAR} FALSE) + set(MISSING_VARS "${MISSING_VARS} ${comp}") + endif() + + endif() + endforeach() + set(COMPONENT_MSG "${FOUND_COMPONENTS_MSG} ${MISSING_COMPONENTS_MSG}") + set(DETAILS "${DETAILS}[c${COMPONENT_MSG}]") + endif() + + # version handling: + set(VERSION_MSG "") + set(VERSION_OK TRUE) + set(VERSION ${${FPHSA_VERSION_VAR}}) + + # check with DEFINED here as the requested or found version may be "0" + if (DEFINED ${_NAME}_FIND_VERSION) + if(DEFINED ${FPHSA_VERSION_VAR}) + + if(${_NAME}_FIND_VERSION_EXACT) # exact version required + # count the dots in the version string + string(REGEX REPLACE "[^.]" "" _VERSION_DOTS "${VERSION}") + # add one dot because there is one dot more than there are components + string(LENGTH "${_VERSION_DOTS}." _VERSION_DOTS) + if (_VERSION_DOTS GREATER ${_NAME}_FIND_VERSION_COUNT) + # Because of the C++ implementation of find_package() ${_NAME}_FIND_VERSION_COUNT + # is at most 4 here. Therefore a simple lookup table is used. + if (${_NAME}_FIND_VERSION_COUNT EQUAL 1) + set(_VERSION_REGEX "[^.]*") + elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 2) + set(_VERSION_REGEX "[^.]*\\.[^.]*") + elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 3) + set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*") + else () + set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*\\.[^.]*") + endif () + string(REGEX REPLACE "^(${_VERSION_REGEX})\\..*" "\\1" _VERSION_HEAD "${VERSION}") + unset(_VERSION_REGEX) + if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _VERSION_HEAD) + set(VERSION_MSG "Found unsuitable version \"${VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"") + set(VERSION_OK FALSE) + else () + set(VERSION_MSG "(found suitable exact version \"${VERSION}\")") + endif () + unset(_VERSION_HEAD) + else () + if (NOT "${${_NAME}_FIND_VERSION}" VERSION_EQUAL "${VERSION}") + set(VERSION_MSG "Found unsuitable version \"${VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"") + set(VERSION_OK FALSE) + else () + set(VERSION_MSG "(found suitable exact version \"${VERSION}\")") + endif () + endif () + unset(_VERSION_DOTS) + + else() # minimum version specified: + if ("${${_NAME}_FIND_VERSION}" VERSION_GREATER "${VERSION}") + set(VERSION_MSG "Found unsuitable version \"${VERSION}\", but required is at least \"${${_NAME}_FIND_VERSION}\"") + set(VERSION_OK FALSE) + else () + set(VERSION_MSG "(found suitable version \"${VERSION}\", minimum required is \"${${_NAME}_FIND_VERSION}\")") + endif () + endif() + + else() + + # if the package was not found, but a version was given, add that to the output: + if(${_NAME}_FIND_VERSION_EXACT) + set(VERSION_MSG "(Required is exact version \"${${_NAME}_FIND_VERSION}\")") + else() + set(VERSION_MSG "(Required is at least version \"${${_NAME}_FIND_VERSION}\")") + endif() + + endif() + else () + if(VERSION) + set(VERSION_MSG "(found version \"${VERSION}\")") + endif() + endif () + + if(VERSION_OK) + set(DETAILS "${DETAILS}[v${VERSION}(${${_NAME}_FIND_VERSION})]") + else() + set(${_FOUND_VAR} FALSE) + endif() + + + # print the result: + if (${_FOUND_VAR}) + FIND_PACKAGE_MESSAGE(${_NAME} "Found ${_NAME}: ${${_FIRST_REQUIRED_VAR}} ${VERSION_MSG} ${COMPONENT_MSG}" "${DETAILS}") + else () + + if(FPHSA_CONFIG_MODE) + _FPHSA_HANDLE_FAILURE_CONFIG_MODE() + else() + if(NOT VERSION_OK) + _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: ${VERSION_MSG} (found ${${_FIRST_REQUIRED_VAR}})") + else() + _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} (missing: ${MISSING_VARS}) ${VERSION_MSG}") + endif() + endif() + + endif () + + set(${_FOUND_VAR} ${${_FOUND_VAR}} PARENT_SCOPE) + +endfunction() diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindPackageMessage.cmake b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindPackageMessage.cmake new file mode 100644 index 000000000..a0349d3db --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindPackageMessage.cmake @@ -0,0 +1,57 @@ +#.rst: +# FindPackageMessage +# ------------------ +# +# +# +# FIND_PACKAGE_MESSAGE( "message for user" "find result details") +# +# This macro is intended to be used in FindXXX.cmake modules files. It +# will print a message once for each unique find result. This is useful +# for telling the user where a package was found. The first argument +# specifies the name (XXX) of the package. The second argument +# specifies the message to display. The third argument lists details +# about the find result so that if they change the message will be +# displayed again. The macro also obeys the QUIET argument to the +# find_package command. +# +# Example: +# +# :: +# +# if(X11_FOUND) +# FIND_PACKAGE_MESSAGE(X11 "Found X11: ${X11_X11_LIB}" +# "[${X11_X11_LIB}][${X11_INCLUDE_DIR}]") +# else() +# ... +# endif() + +#============================================================================= +# Copyright 2008-2009 Kitware, Inc. +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of CMake, substitute the full +# License text for the above reference.) + +function(FIND_PACKAGE_MESSAGE pkg msg details) + # Avoid printing a message repeatedly for the same find result. + if(NOT ${pkg}_FIND_QUIETLY) + string(REPLACE "\n" "" details "${details}") + set(DETAILS_VAR FIND_PACKAGE_MESSAGE_DETAILS_${pkg}) + if(NOT "${details}" STREQUAL "${${DETAILS_VAR}}") + # The message has not yet been printed. + message(STATUS "${msg}") + + # Save the find details in the cache to avoid printing the same + # message again. + set("${DETAILS_VAR}" "${details}" + CACHE INTERNAL "Details about finding ${pkg}") + endif() + endif() +endfunction() diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go index 32d3f0264..e50a3a834 100644 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go @@ -2,32 +2,39 @@ package ethash /* #cgo CFLAGS: -std=gnu99 -Wall -#include "libethash/ethash.h" -#include "libethash/util.c" -#include "libethash/internal.c" -#include "libethash/sha3.c" +#include "src/libethash/util.c" +#include "src/libethash/internal.c" +#include "src/libethash/sha3.c" */ import "C" import ( "bytes" "encoding/binary" + "fmt" + "io/ioutil" "log" "math/big" "math/rand" + "os" + "path" "sync" "time" "unsafe" + "github.com/ethereum/go-ethereum/ethutil" "github.com/ethereum/go-ethereum/logger" "github.com/ethereum/go-ethereum/pow" ) +var tt256 = new(big.Int).Exp(big.NewInt(2), big.NewInt(256), big.NewInt(0)) + var powlogger = logger.NewLogger("POW") type DAG struct { SeedBlockNum uint64 dag unsafe.Pointer // full GB of memory for dag + file bool } type ParamsAndCache struct { @@ -59,16 +66,28 @@ func parseNonce(nonce []byte) (uint64, error) { const epochLength uint64 = 30000 -func getSeedBlockNum(blockNum uint64) uint64 { +func GetSeedBlockNum(blockNum uint64) uint64 { + var seedBlockNum uint64 = 0 + if blockNum > epochLength { + seedBlockNum = ((blockNum - 1) / epochLength) * epochLength + } + return seedBlockNum +} + +/* +XXX THIS DOESN'T WORK!! NEEDS FIXING +blockEpoch will underflow and wrap around causing massive issues +func GetSeedBlockNum(blockNum uint64) uint64 { var seedBlockNum uint64 = 0 - if blockNum >= 2*epochLength { - seedBlockNum = ((blockNum / epochLength) - 1) * epochLength + if blockNum > epochLength { + seedBlockNum = ((blockNum - 1) / epochLength) * epochLength } return seedBlockNum } +*/ func makeParamsAndCache(chainManager pow.ChainManager, blockNum uint64) *ParamsAndCache { - seedBlockNum := getSeedBlockNum(blockNum) + seedBlockNum := GetSeedBlockNum(blockNum) paramsAndCache := &ParamsAndCache{ params: new(C.ethash_params), cache: new(C.ethash_cache), @@ -76,19 +95,19 @@ func makeParamsAndCache(chainManager pow.ChainManager, blockNum uint64) *ParamsA } C.ethash_params_init(paramsAndCache.params, C.uint32_t(seedBlockNum)) paramsAndCache.cache.mem = C.malloc(paramsAndCache.params.cache_size) - seedHash := chainManager.GetBlockByNumber(seedBlockNum).Header().Hash() - log.Println("Params", paramsAndCache.params) + seedHash := chainManager.GetBlockByNumber(seedBlockNum).SeedHash() log.Println("Making Cache") start := time.Now() - C.ethash_mkcache(paramsAndCache.cache, paramsAndCache.params, (*C.uint8_t)((unsafe.Pointer)(&seedHash[0]))) + C.ethash_mkcache(paramsAndCache.cache, paramsAndCache.params, (*C.uint8_t)(unsafe.Pointer(&seedHash[0]))) log.Println("Took:", time.Since(start)) + return paramsAndCache } func (pow *Ethash) updateCache() { pow.cacheMutex.Lock() - seedNum := getSeedBlockNum(pow.chainManager.CurrentBlock().NumberU64()) + seedNum := GetSeedBlockNum(pow.chainManager.CurrentBlock().NumberU64()) if pow.paramsAndCache.SeedBlockNum != seedNum { pow.paramsAndCache = makeParamsAndCache(pow.chainManager, pow.chainManager.CurrentBlock().NumberU64()) } @@ -104,17 +123,66 @@ func makeDAG(p *ParamsAndCache) *DAG { return d } -func (pow *Ethash) updateDAG() { +func (pow *Ethash) writeDagToDisk(dag *DAG, seedNum uint64) *os.File { + data := C.GoBytes(unsafe.Pointer(dag.dag), C.int(pow.paramsAndCache.params.full_size)) + file, err := os.Create("/tmp/dag") + if err != nil { + panic(err) + } + + num := make([]byte, 8) + binary.BigEndian.PutUint64(num, seedNum) + + file.Write(num) + file.Write(data) + + return file +} + +func (pow *Ethash) UpdateDAG() { pow.cacheMutex.Lock() pow.dagMutex.Lock() - seedNum := getSeedBlockNum(pow.chainManager.CurrentBlock().NumberU64()) + seedNum := GetSeedBlockNum(pow.chainManager.CurrentBlock().NumberU64()) if pow.dag == nil || pow.dag.SeedBlockNum != seedNum { + if pow.dag != nil && pow.dag.dag != nil { + C.free(pow.dag.dag) + pow.dag.dag = nil + } + + path := path.Join("/", "tmp", "dag") pow.dag = nil - log.Println("Making Dag") + log.Println("Generating dag") start := time.Now() - pow.dag = makeDAG(pow.paramsAndCache) + + file, err := os.Open(path) + if err != nil { + log.Printf("No dag found in '%s'. Generating new dago(takes a while)...", path) + pow.dag = makeDAG(pow.paramsAndCache) + file = pow.writeDagToDisk(pow.dag, seedNum) + } else { + data, err := ioutil.ReadAll(file) + if err != nil { + panic(err) + } + + num := binary.BigEndian.Uint64(data[0:8]) + if num < seedNum { + log.Printf("Old found. Generating new dag (takes a while)...") + pow.dag = makeDAG(pow.paramsAndCache) + file = pow.writeDagToDisk(pow.dag, seedNum) + } else { + data = data[8:] + pow.dag = &DAG{ + dag: unsafe.Pointer(&data[0]), + file: true, + SeedBlockNum: pow.paramsAndCache.SeedBlockNum, + } + } + } log.Println("Took:", time.Since(start)) + + file.Close() } pow.dagMutex.Unlock() @@ -123,11 +191,10 @@ func (pow *Ethash) updateDAG() { func New(chainManager pow.ChainManager) *Ethash { return &Ethash{ - turbo: false, + turbo: true, paramsAndCache: makeParamsAndCache(chainManager, chainManager.CurrentBlock().NumberU64()), chainManager: chainManager, dag: nil, - ret: new(C.ethash_return_value), cacheMutex: new(sync.Mutex), dagMutex: new(sync.Mutex), } @@ -142,73 +209,70 @@ func (pow *Ethash) CacheSize() uint64 { } func (pow *Ethash) GetSeedHash(blockNum uint64) []byte { - return pow.chainManager.GetBlockByNumber(getSeedBlockNum(blockNum)).Header().Hash() + seednum := GetSeedBlockNum(blockNum) + return pow.chainManager.GetBlockByNumber(seednum).SeedHash() } func (pow *Ethash) Stop() { pow.cacheMutex.Lock() pow.dagMutex.Lock() + defer pow.dagMutex.Unlock() + defer pow.cacheMutex.Unlock() + if pow.paramsAndCache.cache != nil { C.free(pow.paramsAndCache.cache.mem) } - if pow.dag != nil { + if pow.dag.dag != nil && !pow.dag.file { C.free(pow.dag.dag) } - pow.dagMutex.Unlock() - pow.cacheMutex.Unlock() + pow.dag.dag = nil } -func (pow *Ethash) Search(block pow.Block, stop <-chan struct{}) ([]byte, []byte, []byte) { - pow.updateDAG() +func (pow *Ethash) Search(block pow.Block, stop <-chan struct{}) (uint64, []byte, []byte) { + //pow.UpdateDAG() // Not very elegant, multiple mining instances are not supported - pow.dagMutex.Lock() - pow.cacheMutex.Lock() - defer pow.cacheMutex.Unlock() - defer pow.dagMutex.Unlock() + //pow.dagMutex.Lock() + //pow.cacheMutex.Lock() + //defer pow.cacheMutex.Unlock() + //defer pow.dagMutex.Unlock() r := rand.New(rand.NewSource(time.Now().UnixNano())) miningHash := block.HashNoNonce() diff := block.Difficulty() - log.Println("difficulty", diff) + i := int64(0) + starti := i start := time.Now().UnixNano() - t := time.Now() nonce := uint64(r.Int63()) + cMiningHash := (*C.uint8_t)(unsafe.Pointer(&miningHash[0])) + target := new(big.Int).Div(tt256, diff) + var ret C.ethash_return_value for { select { case <-stop: powlogger.Infoln("Breaking from mining") pow.HashRate = 0 - pow.dagMutex.Unlock() - return nil, nil, nil + return 0, nil, nil default: i++ - if time.Since(t) > (1 * time.Second) { - elapsed := time.Now().UnixNano() - start - hashes := ((float64(1e9) / float64(elapsed)) * float64(i)) / 1000 - pow.HashRate = int64(hashes) - powlogger.Infoln("Hashing @", pow.HashRate, "khash") + elapsed := time.Now().UnixNano() - start + hashes := ((float64(1e9) / float64(elapsed)) * float64(i-starti)) / 1000 + pow.HashRate = int64(hashes) - t = time.Now() - } + C.ethash_full(&ret, pow.dag.dag, pow.paramsAndCache.params, cMiningHash, C.uint64_t(nonce)) + result := ethutil.Bytes2Big(C.GoBytes(unsafe.Pointer(&ret.result[0]), C.int(32))) + + if result.Cmp(target) <= 0 { + mixDigest := C.GoBytes(unsafe.Pointer(&ret.mix_hash[0]), C.int(32)) + + return nonce, mixDigest, pow.GetSeedHash(block.NumberU64()) - cMiningHash := (*C.uint8_t)(unsafe.Pointer(&miningHash)) - cnonce := C.uint64_t(nonce) - log.Printf("seed hash, nonce: %x %x\n", miningHash, nonce) - // pow.hash is the output/return of ethash_full - C.ethash_full(pow.ret, pow.dag.dag, pow.paramsAndCache.params, cMiningHash, cnonce) - res := C.ethash_check_difficulty((*C.uint8_t)(&pow.ret.result[0]), (*C.uint8_t)(unsafe.Pointer(&diff.Bytes()[0]))) - if res == 1 { - mixDigest := C.GoBytes(unsafe.Pointer(&pow.ret.mix_hash[0]), 32) - // We don't really nead 32 bytes here - buf := make([]byte, 32) - binary.PutUvarint(buf, nonce) - return buf, mixDigest, pow.GetSeedHash(block.NumberU64()) } + nonce += 1 } @@ -216,43 +280,32 @@ func (pow *Ethash) Search(block pow.Block, stop <-chan struct{}) ([]byte, []byte time.Sleep(20 * time.Microsecond) } } + } func (pow *Ethash) Verify(block pow.Block) bool { // Make sure the SeedHash is set correctly if bytes.Compare(block.SeedHash(), pow.GetSeedHash(block.NumberU64())) != 0 { - log.Println("Block had wrong SeedHash") - log.Println("Expected: ", pow.GetSeedHash(block.NumberU64())) - log.Println("Actual: ", block.SeedHash()) return false } - nonceInt, err := parseNonce(block.Nonce()) - if err != nil { - log.Println("nonce to int err:", err) - return false - } - return pow.verify(block.HashNoNonce(), block.MixDigest(), block.Difficulty(), block.NumberU64(), nonceInt) + return pow.verify(block.HashNoNonce(), block.MixDigest(), block.Difficulty(), block.NumberU64(), block.Nonce()) } func (pow *Ethash) verify(hash []byte, mixDigest []byte, difficulty *big.Int, blockNum uint64, nonce uint64) bool { + fmt.Printf("%x\n%d\n%x\n%x\n", hash, nonce, mixDigest, difficulty.Bytes()) // First check: make sure header, mixDigest, nonce are correct without hitting the DAG // This is to prevent DOS attacks - chash := (*C.uint8_t)(unsafe.Pointer(&hash)) + chash := (*C.uint8_t)(unsafe.Pointer(&hash[0])) cnonce := C.uint64_t(nonce) - cmixDigest := (*C.uint8_t)(unsafe.Pointer(&mixDigest)) - cdifficulty := (*C.uint8_t)(unsafe.Pointer(&difficulty.Bytes()[0])) - if C.ethash_quick_check_difficulty(chash, cnonce, cmixDigest, cdifficulty) != 1 { - log.Println("Failed to pass quick check. Are you sure that the mix digest is correct?") - return false - } + target := new(big.Int).Div(tt256, difficulty) var pAc *ParamsAndCache // If its an old block (doesn't use the current cache) // get the cache for it but don't update (so we don't need the mutex) // Otherwise, it's the current block or a future. // If current, updateCache will do nothing. - if getSeedBlockNum(blockNum) < pow.paramsAndCache.SeedBlockNum { + if GetSeedBlockNum(blockNum) < pow.paramsAndCache.SeedBlockNum { pAc = makeParamsAndCache(pow.chainManager, blockNum) } else { pow.updateCache() @@ -261,9 +314,12 @@ func (pow *Ethash) verify(hash []byte, mixDigest []byte, difficulty *big.Int, bl pAc = pow.paramsAndCache } - C.ethash_light(pow.ret, pAc.cache, pAc.params, chash, cnonce) - res := C.ethash_check_difficulty((*C.uint8_t)(unsafe.Pointer(&pow.ret.result[0])), cdifficulty) - return res == 1 + ret := new(C.ethash_return_value) + + C.ethash_light(ret, pAc.cache, pAc.params, chash, cnonce) + + result := ethutil.Bytes2Big(C.GoBytes(unsafe.Pointer(&ret.result[0]), C.int(32))) + return result.Cmp(target) <= 0 } func (pow *Ethash) GetHashrate() int64 { @@ -275,22 +331,23 @@ func (pow *Ethash) Turbo(on bool) { } func (pow *Ethash) FullHash(nonce uint64, miningHash []byte) []byte { - pow.updateDAG() + pow.UpdateDAG() pow.dagMutex.Lock() defer pow.dagMutex.Unlock() - cMiningHash := (*C.uint8_t)(unsafe.Pointer(&miningHash)) + cMiningHash := (*C.uint8_t)(unsafe.Pointer(&miningHash[0])) cnonce := C.uint64_t(nonce) - log.Println("seed hash, nonce:", miningHash, nonce) + ret := new(C.ethash_return_value) // pow.hash is the output/return of ethash_full - C.ethash_full(pow.ret, pow.dag.dag, pow.paramsAndCache.params, cMiningHash, cnonce) - ghash_full := C.GoBytes(unsafe.Pointer(&pow.ret.result[0]), 32) + C.ethash_full(ret, pow.dag.dag, pow.paramsAndCache.params, cMiningHash, cnonce) + ghash_full := C.GoBytes(unsafe.Pointer(&ret.result), 32) return ghash_full } func (pow *Ethash) LightHash(nonce uint64, miningHash []byte) []byte { - cMiningHash := (*C.uint8_t)(unsafe.Pointer(&miningHash)) + cMiningHash := (*C.uint8_t)(unsafe.Pointer(&miningHash[0])) cnonce := C.uint64_t(nonce) - C.ethash_light(pow.ret, pow.paramsAndCache.cache, pow.paramsAndCache.params, cMiningHash, cnonce) - ghash_light := C.GoBytes(unsafe.Pointer(&pow.ret.result[0]), 32) + ret := new(C.ethash_return_value) + C.ethash_light(ret, pow.paramsAndCache.cache, pow.paramsAndCache.params, cMiningHash, cnonce) + ghash_light := C.GoBytes(unsafe.Pointer(&ret.result), 32) return ghash_light } diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/js/LICENSE b/Godeps/_workspace/src/github.com/ethereum/ethash/js/LICENSE new file mode 100644 index 000000000..070be633d --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/js/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2015 Tim Hughes + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/js/ethash.js b/Godeps/_workspace/src/github.com/ethereum/ethash/js/ethash.js new file mode 100644 index 000000000..bec1284f6 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/js/ethash.js @@ -0,0 +1,190 @@ +// ethash.js +// Tim Hughes +// Revision 19 + +/*jslint node: true, shadow:true */ +"use strict"; + +var Keccak = require('./keccak'); +var util = require('./util'); + +// 32-bit unsigned modulo +function mod32(x, n) +{ + return (x>>>0) % (n>>>0); +} + +function fnv(x, y) +{ + // js integer multiply by 0x01000193 will lose precision + return ((x*0x01000000 | 0) + (x*0x193 | 0)) ^ y; +} + +function computeCache(params, seedWords) +{ + var cache = new Uint32Array(params.cacheSize >> 2); + var cacheNodeCount = params.cacheSize >> 6; + + // Initialize cache + var keccak = new Keccak(); + keccak.digestWords(cache, 0, 16, seedWords, 0, seedWords.length); + for (var n = 1; n < cacheNodeCount; ++n) + { + keccak.digestWords(cache, n<<4, 16, cache, (n-1)<<4, 16); + } + + var tmp = new Uint32Array(16); + + // Do randmemohash passes + for (var r = 0; r < params.cacheRounds; ++r) + { + for (var n = 0; n < cacheNodeCount; ++n) + { + var p0 = mod32(n + cacheNodeCount - 1, cacheNodeCount) << 4; + var p1 = mod32(cache[n<<4|0], cacheNodeCount) << 4; + + for (var w = 0; w < 16; w=(w+1)|0) + { + tmp[w] = cache[p0 | w] ^ cache[p1 | w]; + } + + keccak.digestWords(cache, n<<4, 16, tmp, 0, tmp.length); + } + } + return cache; +} + +function computeDagNode(o_node, params, cache, keccak, nodeIndex) +{ + var cacheNodeCount = params.cacheSize >> 6; + var dagParents = params.dagParents; + + var c = (nodeIndex % cacheNodeCount) << 4; + var mix = o_node; + for (var w = 0; w < 16; ++w) + { + mix[w] = cache[c|w]; + } + mix[0] ^= nodeIndex; + keccak.digestWords(mix, 0, 16, mix, 0, 16); + + for (var p = 0; p < dagParents; ++p) + { + // compute cache node (word) index + c = mod32(fnv(nodeIndex ^ p, mix[p&15]), cacheNodeCount) << 4; + + for (var w = 0; w < 16; ++w) + { + mix[w] = fnv(mix[w], cache[c|w]); + } + } + + keccak.digestWords(mix, 0, 16, mix, 0, 16); +} + +function computeHashInner(mix, params, cache, keccak, tempNode) +{ + var mixParents = params.mixParents|0; + var mixWordCount = params.mixSize >> 2; + var mixNodeCount = mixWordCount >> 4; + var dagPageCount = (params.dagSize / params.mixSize) >> 0; + + // grab initial first word + var s0 = mix[0]; + + // initialise mix from initial 64 bytes + for (var w = 16; w < mixWordCount; ++w) + { + mix[w] = mix[w & 15]; + } + + for (var a = 0; a < mixParents; ++a) + { + var p = mod32(fnv(s0 ^ a, mix[a & (mixWordCount-1)]), dagPageCount); + var d = (p * mixNodeCount)|0; + + for (var n = 0, w = 0; n < mixNodeCount; ++n, w += 16) + { + computeDagNode(tempNode, params, cache, keccak, (d + n)|0); + + for (var v = 0; v < 16; ++v) + { + mix[w|v] = fnv(mix[w|v], tempNode[v]); + } + } + } +} + +function convertSeed(seed) +{ + // todo, reconcile with spec, byte ordering? + // todo, big-endian conversion + var newSeed = util.toWords(seed); + if (newSeed === null) + throw Error("Invalid seed '" + seed + "'"); + return newSeed; +} + +exports.defaultParams = function() +{ + return { + cacheSize: 1048384, + cacheRounds: 3, + dagSize: 1073739904, + dagParents: 256, + mixSize: 128, + mixParents: 64, + }; +}; + +exports.Ethash = function(params, seed) +{ + // precompute cache and related values + seed = convertSeed(seed); + var cache = computeCache(params, seed); + + // preallocate buffers/etc + var initBuf = new ArrayBuffer(96); + var initBytes = new Uint8Array(initBuf); + var initWords = new Uint32Array(initBuf); + var mixWords = new Uint32Array(params.mixSize / 4); + var tempNode = new Uint32Array(16); + var keccak = new Keccak(); + + var retWords = new Uint32Array(8); + var retBytes = new Uint8Array(retWords.buffer); // supposedly read-only + + this.hash = function(header, nonce) + { + // compute initial hash + initBytes.set(header, 0); + initBytes.set(nonce, 32); + keccak.digestWords(initWords, 0, 16, initWords, 0, 8 + nonce.length/4); + + // compute mix + for (var i = 0; i != 16; ++i) + { + mixWords[i] = initWords[i]; + } + computeHashInner(mixWords, params, cache, keccak, tempNode); + + // compress mix and append to initWords + for (var i = 0; i != mixWords.length; i += 4) + { + initWords[16 + i/4] = fnv(fnv(fnv(mixWords[i], mixWords[i+1]), mixWords[i+2]), mixWords[i+3]); + } + + // final Keccak hashes + keccak.digestWords(retWords, 0, 8, initWords, 0, 24); // Keccak-256(s + cmix) + return retBytes; + }; + + this.cacheDigest = function() + { + return keccak.digest(32, new Uint8Array(cache.buffer)); + }; +}; + + + + diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/js/keccak.js b/Godeps/_workspace/src/github.com/ethereum/ethash/js/keccak.js new file mode 100644 index 000000000..84ddde645 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/js/keccak.js @@ -0,0 +1,404 @@ +// keccak.js +// Tim Hughes +// derived from Markku-Juhani O. Saarinen's C code (http://keccak.noekeon.org/readable_code.html) + +/*jslint node: true, shadow:true */ +"use strict"; + +var Keccak_f1600_RC = new Uint32Array([ + 0x00000001, 0x00000000, + 0x00008082, 0x00000000, + 0x0000808a, 0x80000000, + 0x80008000, 0x80000000, + 0x0000808b, 0x00000000, + 0x80000001, 0x00000000, + 0x80008081, 0x80000000, + 0x00008009, 0x80000000, + 0x0000008a, 0x00000000, + 0x00000088, 0x00000000, + 0x80008009, 0x00000000, + 0x8000000a, 0x00000000, + 0x8000808b, 0x00000000, + 0x0000008b, 0x80000000, + 0x00008089, 0x80000000, + 0x00008003, 0x80000000, + 0x00008002, 0x80000000, + 0x00000080, 0x80000000, + 0x0000800a, 0x00000000, + 0x8000000a, 0x80000000, + 0x80008081, 0x80000000, + 0x00008080, 0x80000000, + 0x80000001, 0x00000000, + 0x80008008, 0x80000000 +]); + +function keccak_f1600(outState, outOffset, outSize, inState) +{ + // todo, handle big endian loads + var a00l = inState[0]|0; + var a00h = inState[1]|0; + var a01l = inState[2]|0; + var a01h = inState[3]|0; + var a02l = inState[4]|0; + var a02h = inState[5]|0; + var a03l = inState[6]|0; + var a03h = inState[7]|0; + var a04l = inState[8]|0; + var a04h = inState[9]|0; + var a05l = inState[10]|0; + var a05h = inState[11]|0; + var a06l = inState[12]|0; + var a06h = inState[13]|0; + var a07l = inState[14]|0; + var a07h = inState[15]|0; + var a08l = inState[16]|0; + var a08h = inState[17]|0; + var a09l = inState[18]|0; + var a09h = inState[19]|0; + var a10l = inState[20]|0; + var a10h = inState[21]|0; + var a11l = inState[22]|0; + var a11h = inState[23]|0; + var a12l = inState[24]|0; + var a12h = inState[25]|0; + var a13l = inState[26]|0; + var a13h = inState[27]|0; + var a14l = inState[28]|0; + var a14h = inState[29]|0; + var a15l = inState[30]|0; + var a15h = inState[31]|0; + var a16l = inState[32]|0; + var a16h = inState[33]|0; + var a17l = inState[34]|0; + var a17h = inState[35]|0; + var a18l = inState[36]|0; + var a18h = inState[37]|0; + var a19l = inState[38]|0; + var a19h = inState[39]|0; + var a20l = inState[40]|0; + var a20h = inState[41]|0; + var a21l = inState[42]|0; + var a21h = inState[43]|0; + var a22l = inState[44]|0; + var a22h = inState[45]|0; + var a23l = inState[46]|0; + var a23h = inState[47]|0; + var a24l = inState[48]|0; + var a24h = inState[49]|0; + var b00l, b00h, b01l, b01h, b02l, b02h, b03l, b03h, b04l, b04h; + var b05l, b05h, b06l, b06h, b07l, b07h, b08l, b08h, b09l, b09h; + var b10l, b10h, b11l, b11h, b12l, b12h, b13l, b13h, b14l, b14h; + var b15l, b15h, b16l, b16h, b17l, b17h, b18l, b18h, b19l, b19h; + var b20l, b20h, b21l, b21h, b22l, b22h, b23l, b23h, b24l, b24h; + var tl, nl; + var th, nh; + + for (var r = 0; r < 48; r = (r+2)|0) + { + // Theta + b00l = a00l ^ a05l ^ a10l ^ a15l ^ a20l; + b00h = a00h ^ a05h ^ a10h ^ a15h ^ a20h; + b01l = a01l ^ a06l ^ a11l ^ a16l ^ a21l; + b01h = a01h ^ a06h ^ a11h ^ a16h ^ a21h; + b02l = a02l ^ a07l ^ a12l ^ a17l ^ a22l; + b02h = a02h ^ a07h ^ a12h ^ a17h ^ a22h; + b03l = a03l ^ a08l ^ a13l ^ a18l ^ a23l; + b03h = a03h ^ a08h ^ a13h ^ a18h ^ a23h; + b04l = a04l ^ a09l ^ a14l ^ a19l ^ a24l; + b04h = a04h ^ a09h ^ a14h ^ a19h ^ a24h; + tl = b04l ^ (b01l << 1 | b01h >>> 31); + th = b04h ^ (b01h << 1 | b01l >>> 31); + a00l ^= tl; + a00h ^= th; + a05l ^= tl; + a05h ^= th; + a10l ^= tl; + a10h ^= th; + a15l ^= tl; + a15h ^= th; + a20l ^= tl; + a20h ^= th; + tl = b00l ^ (b02l << 1 | b02h >>> 31); + th = b00h ^ (b02h << 1 | b02l >>> 31); + a01l ^= tl; + a01h ^= th; + a06l ^= tl; + a06h ^= th; + a11l ^= tl; + a11h ^= th; + a16l ^= tl; + a16h ^= th; + a21l ^= tl; + a21h ^= th; + tl = b01l ^ (b03l << 1 | b03h >>> 31); + th = b01h ^ (b03h << 1 | b03l >>> 31); + a02l ^= tl; + a02h ^= th; + a07l ^= tl; + a07h ^= th; + a12l ^= tl; + a12h ^= th; + a17l ^= tl; + a17h ^= th; + a22l ^= tl; + a22h ^= th; + tl = b02l ^ (b04l << 1 | b04h >>> 31); + th = b02h ^ (b04h << 1 | b04l >>> 31); + a03l ^= tl; + a03h ^= th; + a08l ^= tl; + a08h ^= th; + a13l ^= tl; + a13h ^= th; + a18l ^= tl; + a18h ^= th; + a23l ^= tl; + a23h ^= th; + tl = b03l ^ (b00l << 1 | b00h >>> 31); + th = b03h ^ (b00h << 1 | b00l >>> 31); + a04l ^= tl; + a04h ^= th; + a09l ^= tl; + a09h ^= th; + a14l ^= tl; + a14h ^= th; + a19l ^= tl; + a19h ^= th; + a24l ^= tl; + a24h ^= th; + + // Rho Pi + b00l = a00l; + b00h = a00h; + b10l = a01l << 1 | a01h >>> 31; + b10h = a01h << 1 | a01l >>> 31; + b07l = a10l << 3 | a10h >>> 29; + b07h = a10h << 3 | a10l >>> 29; + b11l = a07l << 6 | a07h >>> 26; + b11h = a07h << 6 | a07l >>> 26; + b17l = a11l << 10 | a11h >>> 22; + b17h = a11h << 10 | a11l >>> 22; + b18l = a17l << 15 | a17h >>> 17; + b18h = a17h << 15 | a17l >>> 17; + b03l = a18l << 21 | a18h >>> 11; + b03h = a18h << 21 | a18l >>> 11; + b05l = a03l << 28 | a03h >>> 4; + b05h = a03h << 28 | a03l >>> 4; + b16l = a05h << 4 | a05l >>> 28; + b16h = a05l << 4 | a05h >>> 28; + b08l = a16h << 13 | a16l >>> 19; + b08h = a16l << 13 | a16h >>> 19; + b21l = a08h << 23 | a08l >>> 9; + b21h = a08l << 23 | a08h >>> 9; + b24l = a21l << 2 | a21h >>> 30; + b24h = a21h << 2 | a21l >>> 30; + b04l = a24l << 14 | a24h >>> 18; + b04h = a24h << 14 | a24l >>> 18; + b15l = a04l << 27 | a04h >>> 5; + b15h = a04h << 27 | a04l >>> 5; + b23l = a15h << 9 | a15l >>> 23; + b23h = a15l << 9 | a15h >>> 23; + b19l = a23h << 24 | a23l >>> 8; + b19h = a23l << 24 | a23h >>> 8; + b13l = a19l << 8 | a19h >>> 24; + b13h = a19h << 8 | a19l >>> 24; + b12l = a13l << 25 | a13h >>> 7; + b12h = a13h << 25 | a13l >>> 7; + b02l = a12h << 11 | a12l >>> 21; + b02h = a12l << 11 | a12h >>> 21; + b20l = a02h << 30 | a02l >>> 2; + b20h = a02l << 30 | a02h >>> 2; + b14l = a20l << 18 | a20h >>> 14; + b14h = a20h << 18 | a20l >>> 14; + b22l = a14h << 7 | a14l >>> 25; + b22h = a14l << 7 | a14h >>> 25; + b09l = a22h << 29 | a22l >>> 3; + b09h = a22l << 29 | a22h >>> 3; + b06l = a09l << 20 | a09h >>> 12; + b06h = a09h << 20 | a09l >>> 12; + b01l = a06h << 12 | a06l >>> 20; + b01h = a06l << 12 | a06h >>> 20; + + // Chi + a00l = b00l ^ ~b01l & b02l; + a00h = b00h ^ ~b01h & b02h; + a01l = b01l ^ ~b02l & b03l; + a01h = b01h ^ ~b02h & b03h; + a02l = b02l ^ ~b03l & b04l; + a02h = b02h ^ ~b03h & b04h; + a03l = b03l ^ ~b04l & b00l; + a03h = b03h ^ ~b04h & b00h; + a04l = b04l ^ ~b00l & b01l; + a04h = b04h ^ ~b00h & b01h; + a05l = b05l ^ ~b06l & b07l; + a05h = b05h ^ ~b06h & b07h; + a06l = b06l ^ ~b07l & b08l; + a06h = b06h ^ ~b07h & b08h; + a07l = b07l ^ ~b08l & b09l; + a07h = b07h ^ ~b08h & b09h; + a08l = b08l ^ ~b09l & b05l; + a08h = b08h ^ ~b09h & b05h; + a09l = b09l ^ ~b05l & b06l; + a09h = b09h ^ ~b05h & b06h; + a10l = b10l ^ ~b11l & b12l; + a10h = b10h ^ ~b11h & b12h; + a11l = b11l ^ ~b12l & b13l; + a11h = b11h ^ ~b12h & b13h; + a12l = b12l ^ ~b13l & b14l; + a12h = b12h ^ ~b13h & b14h; + a13l = b13l ^ ~b14l & b10l; + a13h = b13h ^ ~b14h & b10h; + a14l = b14l ^ ~b10l & b11l; + a14h = b14h ^ ~b10h & b11h; + a15l = b15l ^ ~b16l & b17l; + a15h = b15h ^ ~b16h & b17h; + a16l = b16l ^ ~b17l & b18l; + a16h = b16h ^ ~b17h & b18h; + a17l = b17l ^ ~b18l & b19l; + a17h = b17h ^ ~b18h & b19h; + a18l = b18l ^ ~b19l & b15l; + a18h = b18h ^ ~b19h & b15h; + a19l = b19l ^ ~b15l & b16l; + a19h = b19h ^ ~b15h & b16h; + a20l = b20l ^ ~b21l & b22l; + a20h = b20h ^ ~b21h & b22h; + a21l = b21l ^ ~b22l & b23l; + a21h = b21h ^ ~b22h & b23h; + a22l = b22l ^ ~b23l & b24l; + a22h = b22h ^ ~b23h & b24h; + a23l = b23l ^ ~b24l & b20l; + a23h = b23h ^ ~b24h & b20h; + a24l = b24l ^ ~b20l & b21l; + a24h = b24h ^ ~b20h & b21h; + + // Iota + a00l ^= Keccak_f1600_RC[r|0]; + a00h ^= Keccak_f1600_RC[r|1]; + } + + // todo, handle big-endian stores + outState[outOffset|0] = a00l; + outState[outOffset|1] = a00h; + outState[outOffset|2] = a01l; + outState[outOffset|3] = a01h; + outState[outOffset|4] = a02l; + outState[outOffset|5] = a02h; + outState[outOffset|6] = a03l; + outState[outOffset|7] = a03h; + if (outSize == 8) + return; + outState[outOffset|8] = a04l; + outState[outOffset|9] = a04h; + outState[outOffset|10] = a05l; + outState[outOffset|11] = a05h; + outState[outOffset|12] = a06l; + outState[outOffset|13] = a06h; + outState[outOffset|14] = a07l; + outState[outOffset|15] = a07h; + if (outSize == 16) + return; + outState[outOffset|16] = a08l; + outState[outOffset|17] = a08h; + outState[outOffset|18] = a09l; + outState[outOffset|19] = a09h; + outState[outOffset|20] = a10l; + outState[outOffset|21] = a10h; + outState[outOffset|22] = a11l; + outState[outOffset|23] = a11h; + outState[outOffset|24] = a12l; + outState[outOffset|25] = a12h; + outState[outOffset|26] = a13l; + outState[outOffset|27] = a13h; + outState[outOffset|28] = a14l; + outState[outOffset|29] = a14h; + outState[outOffset|30] = a15l; + outState[outOffset|31] = a15h; + outState[outOffset|32] = a16l; + outState[outOffset|33] = a16h; + outState[outOffset|34] = a17l; + outState[outOffset|35] = a17h; + outState[outOffset|36] = a18l; + outState[outOffset|37] = a18h; + outState[outOffset|38] = a19l; + outState[outOffset|39] = a19h; + outState[outOffset|40] = a20l; + outState[outOffset|41] = a20h; + outState[outOffset|42] = a21l; + outState[outOffset|43] = a21h; + outState[outOffset|44] = a22l; + outState[outOffset|45] = a22h; + outState[outOffset|46] = a23l; + outState[outOffset|47] = a23h; + outState[outOffset|48] = a24l; + outState[outOffset|49] = a24h; +} + +var Keccak = function() +{ + var stateBuf = new ArrayBuffer(200); + var stateBytes = new Uint8Array(stateBuf); + var stateWords = new Uint32Array(stateBuf); + + this.digest = function(oSize, iBytes) + { + for (var i = 0; i < 50; ++i) + { + stateWords[i] = 0; + } + + var r = 200 - oSize*2; + var iLength = iBytes.length; + var iOffset = 0; + for ( ; ;) + { + var len = iLength < r ? iLength : r; + for (i = 0; i < len; ++i, ++iOffset) + { + stateBytes[i] ^= iBytes[iOffset]; + } + + if (iLength < r) + break; + iLength -= len; + + keccak_f1600(stateWords, 0, 50, stateWords); + } + + stateBytes[iLength] ^= 1; + stateBytes[r-1] ^= 0x80; + keccak_f1600(stateWords, 0, 50, stateWords); + return stateBytes.subarray(0, oSize); + }; + + this.digestWords = function(oWords, oOffset, oLength, iWords, iOffset, iLength) + { + for (var i = 0; i < 50; ++i) + { + stateWords[i] = 0; + } + + var r = 50 - oLength*2; + for (; ; ) + { + var len = iLength < r ? iLength : r; + for (i = 0; i < len; ++i, ++iOffset) + { + stateWords[i] ^= iWords[iOffset]; + } + + if (iLength < r) + break; + iLength -= len; + + keccak_f1600(stateWords, 0, 50, stateWords); + } + + stateBytes[iLength<<2] ^= 1; + stateBytes[(r<<2) - 1] ^= 0x80; + keccak_f1600(oWords, oOffset, oLength, stateWords); + }; +}; + +module.exports = Keccak; + + diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/js/makekeccak.js b/Godeps/_workspace/src/github.com/ethereum/ethash/js/makekeccak.js new file mode 100644 index 000000000..c4db2b80a --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/js/makekeccak.js @@ -0,0 +1,201 @@ +#!/usr/bin/env node +// makekeccak.js +// Tim Hughes + +/*jslint node: true, shadow:true */ +"use strict"; + +var Keccak_f1600_Rho = [ + 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, + 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 +]; + +var Keccak_f1600_Pi= [ + 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, + 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 +]; + +var Keccak_f1600_RC = [ + 0x00000001, 0x00000000, + 0x00008082, 0x00000000, + 0x0000808a, 0x80000000, + 0x80008000, 0x80000000, + 0x0000808b, 0x00000000, + 0x80000001, 0x00000000, + 0x80008081, 0x80000000, + 0x00008009, 0x80000000, + 0x0000008a, 0x00000000, + 0x00000088, 0x00000000, + 0x80008009, 0x00000000, + 0x8000000a, 0x00000000, + 0x8000808b, 0x00000000, + 0x0000008b, 0x80000000, + 0x00008089, 0x80000000, + 0x00008003, 0x80000000, + 0x00008002, 0x80000000, + 0x00000080, 0x80000000, + 0x0000800a, 0x00000000, + 0x8000000a, 0x80000000, + 0x80008081, 0x80000000, + 0x00008080, 0x80000000, + 0x80000001, 0x00000000, + 0x80008008, 0x80000000, +]; + +function makeRotLow(lo, hi, n) +{ + if (n === 0 || n === 32) throw Error("unsupported"); + if ((n & 0x20) !== 0) + { + n &= ~0x20; + var t = hi; + hi = lo; + lo = t; + } + var hir = hi + " >>> " + (32 - n); + var los = lo + " << " + n; + return los + " | " + hir; +} + +function makeRotHigh(lo, hi, n) +{ + if (n === 0 || n === 32) throw Error("unsupported"); + if ((n & 0x20) !== 0) + { + n &= ~0x20; + var t = hi; + hi = lo; + lo = t; + } + var his = hi + " << " + n; + var lor = lo + " >>> " + (32 - n); + return his + " | " + lor; +} + +function makeKeccak_f1600() +{ + var format = function(n) + { + return n < 10 ? "0"+n : ""+n; + }; + + var a = function(n, w) + { + return "a" + format(n) + (w !== 0?'h':'l'); + }; + + var b = function(n, w) + { + return "b" + format(n) + (w !== 0?'h':'l'); + }; + + var str = ""; + str += "function keccak_f1600(outState, outOffset, outSize, inState)\n"; + str += "{\n"; + + for (var i = 0; i < 25; ++i) + { + for (var w = 0; w <= 1; ++w) + { + str += "\tvar " + a(i,w) + " = inState["+(i<<1|w)+"]|0;\n"; + } + } + + for (var j = 0; j < 5; ++j) + { + str += "\tvar "; + for (var i = 0; i < 5; ++i) + { + if (i !== 0) + str += ", "; + str += b(j*5+i,0) + ", " + b(j*5+i,1); + } + str += ";\n"; + } + + str += "\tvar tl, th;\n"; + str += "\n"; + str += "\tfor (var r = 0; r < 48; r = (r+2)|0)\n"; + str += "\t{\n"; + + + // Theta + str += "\t\t// Theta\n"; + for (var i = 0; i < 5; ++i) + { + for (var w = 0; w <= 1; ++w) + { + str += "\t\t" + b(i,w) + " = " + a(i,w) + " ^ " + a(i+5,w) + " ^ " + a(i+10,w) + " ^ " + a(i+15,w) + " ^ " + a(i+20,w) + ";\n"; + } + } + + for (var i = 0; i < 5; ++i) + { + var i4 = (i + 4) % 5; + var i1 = (i + 1) % 5; + str += "\t\ttl = " + b(i4,0) + " ^ (" + b(i1,0) + " << 1 | " + b(i1,1) + " >>> 31);\n"; + str += "\t\tth = " + b(i4,1) + " ^ (" + b(i1,1) + " << 1 | " + b(i1,0) + " >>> 31);\n"; + + for (var j = 0; j < 25; j = (j+5)|0) + { + str += "\t\t" + a((j+i),0) + " ^= tl;\n"; + str += "\t\t" + a((j+i),1) + " ^= th;\n"; + } + } + + + // Rho Pi + str += "\n\t\t// Rho Pi\n"; + for (var w = 0; w <= 1; ++w) + { + str += "\t\t" + b(0,w) + " = " + a(0,w) + ";\n"; + } + var opi = 1; + for (var i = 0; i < 24; ++i) + { + var pi = Keccak_f1600_Pi[i]; + str += "\t\t" + b(pi,0) + " = " + makeRotLow(a(opi,0), a(opi,1), Keccak_f1600_Rho[i]) + ";\n"; + str += "\t\t" + b(pi,1) + " = " + makeRotHigh(a(opi,0), a(opi,1), Keccak_f1600_Rho[i]) + ";\n"; + opi = pi; + } + + // Chi + str += "\n\t\t// Chi\n"; + for (var j = 0; j < 25; j += 5) + { + for (var i = 0; i < 5; ++i) + { + for (var w = 0; w <= 1; ++w) + { + str += "\t\t" + a(j+i,w) + " = " + b(j+i,w) + " ^ ~" + b(j+(i+1)%5,w) + " & " + b(j+(i+2)%5,w) + ";\n"; + } + } + } + + // Iota + str += "\n\t\t// Iota\n"; + for (var w = 0; w <= 1; ++w) + { + str += "\t\t" + a(0,w) + " ^= Keccak_f1600_RC[r|" + w + "];\n"; + } + + + str += "\t}\n"; + + for (var i = 0; i < 25; ++i) + { + if (i == 4 || i == 8) + { + str += "\tif (outSize == " + i*2 + ")\n\t\treturn;\n"; + } + for (var w = 0; w <= 1; ++w) + { + str += "\toutState[outOffset|"+(i<<1|w)+"] = " + a(i,w) + ";\n"; + } + } + str += "}\n"; + + return str; +} + +console.log(makeKeccak_f1600()); diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/js/test.js b/Godeps/_workspace/src/github.com/ethereum/ethash/js/test.js new file mode 100644 index 000000000..7ebb733ff --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/js/test.js @@ -0,0 +1,53 @@ +// test.js +// Tim Hughes + +/*jslint node: true, shadow:true */ +"use strict"; + +var ethash = require('./ethash'); +var util = require('./util'); +var Keccak = require('./keccak'); + +// sanity check hash functions +var src = util.stringToBytes(""); +if (util.bytesToHexString(new Keccak().digest(32, src)) != "c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470") throw Error("Keccak-256 failed"); +if (util.bytesToHexString(new Keccak().digest(64, src)) != "0eab42de4c3ceb9235fc91acffe746b29c29a8c366b7c60e4e67c466f36a4304c00fa9caf9d87976ba469bcbe06713b435f091ef2769fb160cdab33d3670680e") throw Error("Keccak-512 failed"); + +src = new Uint32Array(src.buffer); +var dst = new Uint32Array(8); +new Keccak().digestWords(dst, 0, dst.length, src, 0, src.length); +if (util.wordsToHexString(dst) != "c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470") throw Error("Keccak-256 Fast failed"); + +var dst = new Uint32Array(16); +new Keccak().digestWords(dst, 0, dst.length, src, 0, src.length); +if (util.wordsToHexString(dst) != "0eab42de4c3ceb9235fc91acffe746b29c29a8c366b7c60e4e67c466f36a4304c00fa9caf9d87976ba469bcbe06713b435f091ef2769fb160cdab33d3670680e") throw Error("Keccak-512 Fast failed"); + + +// init params +var ethashParams = ethash.defaultParams(); +//ethashParams.cacheRounds = 0; + +// create hasher +var seed = util.hexStringToBytes("9410b944535a83d9adf6bbdcc80e051f30676173c16ca0d32d6f1263fc246466") +var startTime = new Date().getTime(); +var hasher = new ethash.Ethash(ethashParams, seed); +console.log('Ethash startup took: '+(new Date().getTime() - startTime) + "ms"); +console.log('Ethash cache hash: ' + util.bytesToHexString(hasher.cacheDigest())); + +var testHexString = "c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470"; +if (testHexString != util.bytesToHexString(util.hexStringToBytes(testHexString))) + throw Error("bytesToHexString or hexStringToBytes broken"); + + +var header = util.hexStringToBytes("c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470"); +var nonce = util.hexStringToBytes("0000000000000000"); +var hash; + +startTime = new Date().getTime(); +var trials = 10; +for (var i = 0; i < trials; ++i) +{ + hash = hasher.hash(header, nonce); +} +console.log("Light client hashes averaged: " + (new Date().getTime() - startTime)/trials + "ms"); +console.log("Hash = " + util.bytesToHexString(hash)); diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/js/util.js b/Godeps/_workspace/src/github.com/ethereum/ethash/js/util.js new file mode 100644 index 000000000..79743cd91 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/js/util.js @@ -0,0 +1,100 @@ +// util.js +// Tim Hughes + +/*jslint node: true, shadow:true */ +"use strict"; + +function nibbleToChar(nibble) +{ + return String.fromCharCode((nibble < 10 ? 48 : 87) + nibble); +} + +function charToNibble(chr) +{ + if (chr >= 48 && chr <= 57) + { + return chr - 48; + } + if (chr >= 65 && chr <= 70) + { + return chr - 65 + 10; + } + if (chr >= 97 && chr <= 102) + { + return chr - 97 + 10; + } + return 0; +} + +function stringToBytes(str) +{ + var bytes = new Uint8Array(str.length); + for (var i = 0; i != str.length; ++i) + { + bytes[i] = str.charCodeAt(i); + } + return bytes; +} + +function hexStringToBytes(str) +{ + var bytes = new Uint8Array(str.length>>>1); + for (var i = 0; i != bytes.length; ++i) + { + bytes[i] = charToNibble(str.charCodeAt(i<<1 | 0)) << 4; + bytes[i] |= charToNibble(str.charCodeAt(i<<1 | 1)); + } + return bytes; +} + +function bytesToHexString(bytes) +{ + var str = ""; + for (var i = 0; i != bytes.length; ++i) + { + str += nibbleToChar(bytes[i] >>> 4); + str += nibbleToChar(bytes[i] & 0xf); + } + return str; +} + +function wordsToHexString(words) +{ + return bytesToHexString(new Uint8Array(words.buffer)); +} + +function uint32ToHexString(num) +{ + var buf = new Uint8Array(4); + buf[0] = (num >> 24) & 0xff; + buf[1] = (num >> 16) & 0xff; + buf[2] = (num >> 8) & 0xff; + buf[3] = (num >> 0) & 0xff; + return bytesToHexString(buf); +} + +function toWords(input) +{ + if (input instanceof Uint32Array) + { + return input; + } + else if (input instanceof Uint8Array) + { + var tmp = new Uint8Array((input.length + 3) & ~3); + tmp.set(input); + return new Uint32Array(tmp.buffer); + } + else if (typeof input === typeof "") + { + return toWords(stringToBytes(input)); + } + return null; +} + +exports.stringToBytes = stringToBytes; +exports.hexStringToBytes = hexStringToBytes; +exports.bytesToHexString = bytesToHexString; +exports.wordsToHexString = wordsToHexString; +exports.uint32ToHexString = uint32ToHexString; +exports.toWords = toWords; \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/CMakeLists.txt deleted file mode 100644 index 19d2fecbf..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -set(LIBRARY ethash-cl) -set(CMAKE_BUILD_TYPE Release) - -if (NOT OPENCL_FOUND) - find_package(OpenCL) -endif() -if (OPENCL_FOUND) - include_directories(${OPENCL_INCLUDE_DIRS}) - include_directories(..) - add_library(${LIBRARY} ethash_cl_miner.cpp ethash_cl_miner.h) - TARGET_LINK_LIBRARIES(${LIBRARY} ${OPENCL_LIBRARIES} ethash) -endif() \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/cl.hpp b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/cl.hpp deleted file mode 100644 index 38fac1962..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/cl.hpp +++ /dev/null @@ -1,12452 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2013 The Khronos Group Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and/or associated documentation files (the - * "Materials"), to deal in the Materials without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Materials, and to - * permit persons to whom the Materials are furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Materials. - * - * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. - ******************************************************************************/ - -/*! \file - * - * \brief C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33) and - * OpenCL 1.2 (rev 15) - * \author Benedict R. Gaster, Laurent Morichetti and Lee Howes - * - * Additions and fixes from: - * Brian Cole, March 3rd 2010 and April 2012 - * Matt Gruenke, April 2012. - * Bruce Merry, February 2013. - * Tom Deakin and Simon McIntosh-Smith, July 2013 - * - * \version 1.2.6 - * \date August 2013 - * - * Optional extension support - * - * cl - * cl_ext_device_fission - * #define USE_CL_DEVICE_FISSION - */ - -/*! \mainpage - * \section intro Introduction - * For many large applications C++ is the language of choice and so it seems - * reasonable to define C++ bindings for OpenCL. - * - * - * The interface is contained with a single C++ header file \em cl.hpp and all - * definitions are contained within the namespace \em cl. There is no additional - * requirement to include \em cl.h and to use either the C++ or original C - * bindings it is enough to simply include \em cl.hpp. - * - * The bindings themselves are lightweight and correspond closely to the - * underlying C API. Using the C++ bindings introduces no additional execution - * overhead. - * - * For detail documentation on the bindings see: - * - * The OpenCL C++ Wrapper API 1.2 (revision 09) - * http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.2.pdf - * - * \section example Example - * - * The following example shows a general use case for the C++ - * bindings, including support for the optional exception feature and - * also the supplied vector and string classes, see following sections for - * decriptions of these features. - * - * \code - * #define __CL_ENABLE_EXCEPTIONS - * - * #if defined(__APPLE__) || defined(__MACOSX) - * #include - * #else - * #include - * #endif - * #include - * #include - * #include - * - * const char * helloStr = "__kernel void " - * "hello(void) " - * "{ " - * " " - * "} "; - * - * int - * main(void) - * { - * cl_int err = CL_SUCCESS; - * try { - * - * std::vector platforms; - * cl::Platform::get(&platforms); - * if (platforms.size() == 0) { - * std::cout << "Platform size 0\n"; - * return -1; - * } - * - * cl_context_properties properties[] = - * { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; - * cl::Context context(CL_DEVICE_TYPE_CPU, properties); - * - * std::vector devices = context.getInfo(); - * - * cl::Program::Sources source(1, - * std::make_pair(helloStr,strlen(helloStr))); - * cl::Program program_ = cl::Program(context, source); - * program_.build(devices); - * - * cl::Kernel kernel(program_, "hello", &err); - * - * cl::Event event; - * cl::CommandQueue queue(context, devices[0], 0, &err); - * queue.enqueueNDRangeKernel( - * kernel, - * cl::NullRange, - * cl::NDRange(4,4), - * cl::NullRange, - * NULL, - * &event); - * - * event.wait(); - * } - * catch (cl::Error err) { - * std::cerr - * << "ERROR: " - * << err.what() - * << "(" - * << err.err() - * << ")" - * << std::endl; - * } - * - * return EXIT_SUCCESS; - * } - * - * \endcode - * - */ -#ifndef CL_HPP_ -#define CL_HPP_ - -#ifdef _WIN32 - -#include -#include -#include -#include - -#if defined(__CL_ENABLE_EXCEPTIONS) -#include -#endif // #if defined(__CL_ENABLE_EXCEPTIONS) - -#pragma push_macro("max") -#undef max -#if defined(USE_DX_INTEROP) -#include -#include -#endif -#endif // _WIN32 - -// -#if defined(USE_CL_DEVICE_FISSION) -#include -#endif - -#if defined(__APPLE__) || defined(__MACOSX) -#include -#include -#include -#else -#include -#include -#endif // !__APPLE__ - -// To avoid accidentally taking ownership of core OpenCL types -// such as cl_kernel constructors are made explicit -// under OpenCL 1.2 -#if defined(CL_VERSION_1_2) && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -#define __CL_EXPLICIT_CONSTRUCTORS explicit -#else // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -#define __CL_EXPLICIT_CONSTRUCTORS -#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - -// Define deprecated prefixes and suffixes to ensure compilation -// in case they are not pre-defined -#if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) -#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) -#if !defined(CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED) -#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) - -#if !defined(CL_CALLBACK) -#define CL_CALLBACK -#endif //CL_CALLBACK - -#include -#include - -#if !defined(__NO_STD_VECTOR) -#include -#endif - -#if !defined(__NO_STD_STRING) -#include -#endif - -#if defined(linux) || defined(__APPLE__) || defined(__MACOSX) -#include - -#include -#include -#endif // linux - -#include - - -/*! \namespace cl - * - * \brief The OpenCL C++ bindings are defined within this namespace. - * - */ -namespace cl { - -class Memory; - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) -#define __INIT_CL_EXT_FCN_PTR(name) \ - if(!pfn_##name) { \ - pfn_##name = (PFN_##name) \ - clGetExtensionFunctionAddress(#name); \ - if(!pfn_##name) { \ - } \ - } -#endif // #if defined(CL_VERSION_1_1) - -#if defined(CL_VERSION_1_2) -#define __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, name) \ - if(!pfn_##name) { \ - pfn_##name = (PFN_##name) \ - clGetExtensionFunctionAddressForPlatform(platform, #name); \ - if(!pfn_##name) { \ - } \ - } -#endif // #if defined(CL_VERSION_1_1) - -class Program; -class Device; -class Context; -class CommandQueue; -class Memory; -class Buffer; - -#if defined(__CL_ENABLE_EXCEPTIONS) -/*! \brief Exception class - * - * This may be thrown by API functions when __CL_ENABLE_EXCEPTIONS is defined. - */ -class Error : public std::exception -{ -private: - cl_int err_; - const char * errStr_; -public: - /*! \brief Create a new CL error exception for a given error code - * and corresponding message. - * - * \param err error code value. - * - * \param errStr a descriptive string that must remain in scope until - * handling of the exception has concluded. If set, it - * will be returned by what(). - */ - Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) - {} - - ~Error() throw() {} - - /*! \brief Get error string associated with exception - * - * \return A memory pointer to the error message string. - */ - virtual const char * what() const throw () - { - if (errStr_ == NULL) { - return "empty"; - } - else { - return errStr_; - } - } - - /*! \brief Get error code associated with exception - * - * \return The error code. - */ - cl_int err(void) const { return err_; } -}; - -#define __ERR_STR(x) #x -#else -#define __ERR_STR(x) NULL -#endif // __CL_ENABLE_EXCEPTIONS - - -namespace detail -{ -#if defined(__CL_ENABLE_EXCEPTIONS) -static inline cl_int errHandler ( - cl_int err, - const char * errStr = NULL) -{ - if (err != CL_SUCCESS) { - throw Error(err, errStr); - } - return err; -} -#else -static inline cl_int errHandler (cl_int err, const char * errStr = NULL) -{ - (void) errStr; // suppress unused variable warning - return err; -} -#endif // __CL_ENABLE_EXCEPTIONS -} - - - -//! \cond DOXYGEN_DETAIL -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#define __GET_DEVICE_INFO_ERR __ERR_STR(clGetDeviceInfo) -#define __GET_PLATFORM_INFO_ERR __ERR_STR(clGetPlatformInfo) -#define __GET_DEVICE_IDS_ERR __ERR_STR(clGetDeviceIDs) -#define __GET_PLATFORM_IDS_ERR __ERR_STR(clGetPlatformIDs) -#define __GET_CONTEXT_INFO_ERR __ERR_STR(clGetContextInfo) -#define __GET_EVENT_INFO_ERR __ERR_STR(clGetEventInfo) -#define __GET_EVENT_PROFILE_INFO_ERR __ERR_STR(clGetEventProfileInfo) -#define __GET_MEM_OBJECT_INFO_ERR __ERR_STR(clGetMemObjectInfo) -#define __GET_IMAGE_INFO_ERR __ERR_STR(clGetImageInfo) -#define __GET_SAMPLER_INFO_ERR __ERR_STR(clGetSamplerInfo) -#define __GET_KERNEL_INFO_ERR __ERR_STR(clGetKernelInfo) -#if defined(CL_VERSION_1_2) -#define __GET_KERNEL_ARG_INFO_ERR __ERR_STR(clGetKernelArgInfo) -#endif // #if defined(CL_VERSION_1_2) -#define __GET_KERNEL_WORK_GROUP_INFO_ERR __ERR_STR(clGetKernelWorkGroupInfo) -#define __GET_PROGRAM_INFO_ERR __ERR_STR(clGetProgramInfo) -#define __GET_PROGRAM_BUILD_INFO_ERR __ERR_STR(clGetProgramBuildInfo) -#define __GET_COMMAND_QUEUE_INFO_ERR __ERR_STR(clGetCommandQueueInfo) - -#define __CREATE_CONTEXT_ERR __ERR_STR(clCreateContext) -#define __CREATE_CONTEXT_FROM_TYPE_ERR __ERR_STR(clCreateContextFromType) -#define __GET_SUPPORTED_IMAGE_FORMATS_ERR __ERR_STR(clGetSupportedImageFormats) - -#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer) -#define __COPY_ERR __ERR_STR(cl::copy) -#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer) -#define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) -#define __CREATE_GL_RENDER_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) -#define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo) -#if defined(CL_VERSION_1_2) -#define __CREATE_IMAGE_ERR __ERR_STR(clCreateImage) -#define __CREATE_GL_TEXTURE_ERR __ERR_STR(clCreateFromGLTexture) -#define __IMAGE_DIMENSION_ERR __ERR_STR(Incorrect image dimensions) -#endif // #if defined(CL_VERSION_1_2) -#define __CREATE_SAMPLER_ERR __ERR_STR(clCreateSampler) -#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback) - -#define __CREATE_USER_EVENT_ERR __ERR_STR(clCreateUserEvent) -#define __SET_USER_EVENT_STATUS_ERR __ERR_STR(clSetUserEventStatus) -#define __SET_EVENT_CALLBACK_ERR __ERR_STR(clSetEventCallback) -#define __WAIT_FOR_EVENTS_ERR __ERR_STR(clWaitForEvents) - -#define __CREATE_KERNEL_ERR __ERR_STR(clCreateKernel) -#define __SET_KERNEL_ARGS_ERR __ERR_STR(clSetKernelArg) -#define __CREATE_PROGRAM_WITH_SOURCE_ERR __ERR_STR(clCreateProgramWithSource) -#define __CREATE_PROGRAM_WITH_BINARY_ERR __ERR_STR(clCreateProgramWithBinary) -#if defined(CL_VERSION_1_2) -#define __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR __ERR_STR(clCreateProgramWithBuiltInKernels) -#endif // #if defined(CL_VERSION_1_2) -#define __BUILD_PROGRAM_ERR __ERR_STR(clBuildProgram) -#if defined(CL_VERSION_1_2) -#define __COMPILE_PROGRAM_ERR __ERR_STR(clCompileProgram) - -#endif // #if defined(CL_VERSION_1_2) -#define __CREATE_KERNELS_IN_PROGRAM_ERR __ERR_STR(clCreateKernelsInProgram) - -#define __CREATE_COMMAND_QUEUE_ERR __ERR_STR(clCreateCommandQueue) -#define __SET_COMMAND_QUEUE_PROPERTY_ERR __ERR_STR(clSetCommandQueueProperty) -#define __ENQUEUE_READ_BUFFER_ERR __ERR_STR(clEnqueueReadBuffer) -#define __ENQUEUE_READ_BUFFER_RECT_ERR __ERR_STR(clEnqueueReadBufferRect) -#define __ENQUEUE_WRITE_BUFFER_ERR __ERR_STR(clEnqueueWriteBuffer) -#define __ENQUEUE_WRITE_BUFFER_RECT_ERR __ERR_STR(clEnqueueWriteBufferRect) -#define __ENQEUE_COPY_BUFFER_ERR __ERR_STR(clEnqueueCopyBuffer) -#define __ENQEUE_COPY_BUFFER_RECT_ERR __ERR_STR(clEnqueueCopyBufferRect) -#define __ENQUEUE_FILL_BUFFER_ERR __ERR_STR(clEnqueueFillBuffer) -#define __ENQUEUE_READ_IMAGE_ERR __ERR_STR(clEnqueueReadImage) -#define __ENQUEUE_WRITE_IMAGE_ERR __ERR_STR(clEnqueueWriteImage) -#define __ENQUEUE_COPY_IMAGE_ERR __ERR_STR(clEnqueueCopyImage) -#define __ENQUEUE_FILL_IMAGE_ERR __ERR_STR(clEnqueueFillImage) -#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR __ERR_STR(clEnqueueCopyImageToBuffer) -#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR __ERR_STR(clEnqueueCopyBufferToImage) -#define __ENQUEUE_MAP_BUFFER_ERR __ERR_STR(clEnqueueMapBuffer) -#define __ENQUEUE_MAP_IMAGE_ERR __ERR_STR(clEnqueueMapImage) -#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR __ERR_STR(clEnqueueUnMapMemObject) -#define __ENQUEUE_NDRANGE_KERNEL_ERR __ERR_STR(clEnqueueNDRangeKernel) -#define __ENQUEUE_TASK_ERR __ERR_STR(clEnqueueTask) -#define __ENQUEUE_NATIVE_KERNEL __ERR_STR(clEnqueueNativeKernel) -#if defined(CL_VERSION_1_2) -#define __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR __ERR_STR(clEnqueueMigrateMemObjects) -#endif // #if defined(CL_VERSION_1_2) - -#define __ENQUEUE_ACQUIRE_GL_ERR __ERR_STR(clEnqueueAcquireGLObjects) -#define __ENQUEUE_RELEASE_GL_ERR __ERR_STR(clEnqueueReleaseGLObjects) - - -#define __RETAIN_ERR __ERR_STR(Retain Object) -#define __RELEASE_ERR __ERR_STR(Release Object) -#define __FLUSH_ERR __ERR_STR(clFlush) -#define __FINISH_ERR __ERR_STR(clFinish) -#define __VECTOR_CAPACITY_ERR __ERR_STR(Vector capacity error) - -/** - * CL 1.2 version that uses device fission. - */ -#if defined(CL_VERSION_1_2) -#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevices) -#else -#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevicesEXT) -#endif // #if defined(CL_VERSION_1_2) - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) -#define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker) -#define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents) -#define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier) -#define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler) -#define __CREATE_GL_TEXTURE_2D_ERR __ERR_STR(clCreateFromGLTexture2D) -#define __CREATE_GL_TEXTURE_3D_ERR __ERR_STR(clCreateFromGLTexture3D) -#define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D) -#define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D) -#endif // #if defined(CL_VERSION_1_1) - -#endif // __CL_USER_OVERRIDE_ERROR_STRINGS -//! \endcond - -/** - * CL 1.2 marker and barrier commands - */ -#if defined(CL_VERSION_1_2) -#define __ENQUEUE_MARKER_WAIT_LIST_ERR __ERR_STR(clEnqueueMarkerWithWaitList) -#define __ENQUEUE_BARRIER_WAIT_LIST_ERR __ERR_STR(clEnqueueBarrierWithWaitList) -#endif // #if defined(CL_VERSION_1_2) - -#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING) -typedef std::string STRING_CLASS; -#elif !defined(__USE_DEV_STRING) - -/*! \class string - * \brief Simple string class, that provides a limited subset of std::string - * functionality but avoids many of the issues that come with that class. - - * \note Deprecated. Please use std::string as default or - * re-define the string class to match the std::string - * interface by defining STRING_CLASS - */ -class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED string CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED -{ -private: - ::size_t size_; - char * str_; -public: - //! \brief Constructs an empty string, allocating no memory. - string(void) : size_(0), str_(NULL) - { - } - - /*! \brief Constructs a string populated from an arbitrary value of - * specified size. - * - * An extra '\0' is added, in case none was contained in str. - * - * \param str the initial value of the string instance. Note that '\0' - * characters receive no special treatment. If NULL, - * the string is left empty, with a size of 0. - * - * \param size the number of characters to copy from str. - */ - string(const char * str, ::size_t size) : - size_(size), - str_(NULL) - { - if( size > 0 ) { - str_ = new char[size_+1]; - if (str_ != NULL) { - memcpy(str_, str, size_ * sizeof(char)); - str_[size_] = '\0'; - } - else { - size_ = 0; - } - } - } - - /*! \brief Constructs a string populated from a null-terminated value. - * - * \param str the null-terminated initial value of the string instance. - * If NULL, the string is left empty, with a size of 0. - */ - string(const char * str) : - size_(0), - str_(NULL) - { - if( str ) { - size_= ::strlen(str); - } - if( size_ > 0 ) { - str_ = new char[size_ + 1]; - if (str_ != NULL) { - memcpy(str_, str, (size_ + 1) * sizeof(char)); - } - } - } - - void resize( ::size_t n ) - { - if( size_ == n ) { - return; - } - if (n == 0) { - if( str_ ) { - delete [] str_; - } - str_ = NULL; - size_ = 0; - } - else { - char *newString = new char[n + 1]; - int copySize = n; - if( size_ < n ) { - copySize = size_; - } - size_ = n; - - if(str_) { - memcpy(newString, str_, (copySize + 1) * sizeof(char)); - } - if( copySize < size_ ) { - memset(newString + copySize, 0, size_ - copySize); - } - newString[size_] = '\0'; - - delete [] str_; - str_ = newString; - } - } - - const char& operator[] ( ::size_t pos ) const - { - return str_[pos]; - } - - char& operator[] ( ::size_t pos ) - { - return str_[pos]; - } - - /*! \brief Copies the value of another string to this one. - * - * \param rhs the string to copy. - * - * \returns a reference to the modified instance. - */ - string& operator=(const string& rhs) - { - if (this == &rhs) { - return *this; - } - - if( str_ != NULL ) { - delete [] str_; - str_ = NULL; - size_ = 0; - } - - if (rhs.size_ == 0 || rhs.str_ == NULL) { - str_ = NULL; - size_ = 0; - } - else { - str_ = new char[rhs.size_ + 1]; - size_ = rhs.size_; - - if (str_ != NULL) { - memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char)); - } - else { - size_ = 0; - } - } - - return *this; - } - - /*! \brief Constructs a string by copying the value of another instance. - * - * \param rhs the string to copy. - */ - string(const string& rhs) : - size_(0), - str_(NULL) - { - *this = rhs; - } - - //! \brief Destructor - frees memory used to hold the current value. - ~string() - { - delete[] str_; - str_ = NULL; - } - - //! \brief Queries the length of the string, excluding any added '\0's. - ::size_t size(void) const { return size_; } - - //! \brief Queries the length of the string, excluding any added '\0's. - ::size_t length(void) const { return size(); } - - /*! \brief Returns a pointer to the private copy held by this instance, - * or "" if empty/unset. - */ - const char * c_str(void) const { return (str_) ? str_ : "";} -}; -typedef cl::string STRING_CLASS; -#endif // #elif !defined(__USE_DEV_STRING) - -#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) -#define VECTOR_CLASS std::vector -#elif !defined(__USE_DEV_VECTOR) -#define VECTOR_CLASS cl::vector - -#if !defined(__MAX_DEFAULT_VECTOR_SIZE) -#define __MAX_DEFAULT_VECTOR_SIZE 10 -#endif - -/*! \class vector - * \brief Fixed sized vector implementation that mirroring - * - * \note Deprecated. Please use std::vector as default or - * re-define the vector class to match the std::vector - * interface by defining VECTOR_CLASS - - * \note Not recommended for use with custom objects as - * current implementation will construct N elements - * - * std::vector functionality. - * \brief Fixed sized vector compatible with std::vector. - * - * \note - * This differs from std::vector<> not just in memory allocation, - * but also in terms of when members are constructed, destroyed, - * and assigned instead of being copy constructed. - * - * \param T type of element contained in the vector. - * - * \param N maximum size of the vector. - */ -template -class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED vector CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED -{ -private: - T data_[N]; - unsigned int size_; - -public: - //! \brief Constructs an empty vector with no memory allocated. - vector() : - size_(static_cast(0)) - {} - - //! \brief Deallocates the vector's memory and destroys all of its elements. - ~vector() - { - clear(); - } - - //! \brief Returns the number of elements currently contained. - unsigned int size(void) const - { - return size_; - } - - /*! \brief Empties the vector of all elements. - * \note - * This does not deallocate memory but will invoke destructors - * on contained elements. - */ - void clear() - { - while(!empty()) { - pop_back(); - } - } - - /*! \brief Appends an element after the last valid element. - * Calling this on a vector that has reached capacity will throw an - * exception if exceptions are enabled. - */ - void push_back (const T& x) - { - if (size() < N) { - new (&data_[size_]) T(x); - size_++; - } else { - detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); - } - } - - /*! \brief Removes the last valid element from the vector. - * Calling this on an empty vector will throw an exception - * if exceptions are enabled. - */ - void pop_back(void) - { - if (size_ != 0) { - --size_; - data_[size_].~T(); - } else { - detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); - } - } - - /*! \brief Constructs with a value copied from another. - * - * \param vec the vector to copy. - */ - vector(const vector& vec) : - size_(vec.size_) - { - if (size_ != 0) { - assign(vec.begin(), vec.end()); - } - } - - /*! \brief Constructs with a specified number of initial elements. - * - * \param size number of initial elements. - * - * \param val value of initial elements. - */ - vector(unsigned int size, const T& val = T()) : - size_(0) - { - for (unsigned int i = 0; i < size; i++) { - push_back(val); - } - } - - /*! \brief Overwrites the current content with that copied from another - * instance. - * - * \param rhs vector to copy. - * - * \returns a reference to this. - */ - vector& operator=(const vector& rhs) - { - if (this == &rhs) { - return *this; - } - - if (rhs.size_ != 0) { - assign(rhs.begin(), rhs.end()); - } else { - clear(); - } - - return *this; - } - - /*! \brief Tests equality against another instance. - * - * \param vec the vector against which to compare. - */ - bool operator==(vector &vec) - { - if (size() != vec.size()) { - return false; - } - - for( unsigned int i = 0; i < size(); ++i ) { - if( operator[](i) != vec[i] ) { - return false; - } - } - return true; - } - - //! \brief Conversion operator to T*. - operator T* () { return data_; } - - //! \brief Conversion operator to const T*. - operator const T* () const { return data_; } - - //! \brief Tests whether this instance has any elements. - bool empty (void) const - { - return size_==0; - } - - //! \brief Returns the maximum number of elements this instance can hold. - unsigned int max_size (void) const - { - return N; - } - - //! \brief Returns the maximum number of elements this instance can hold. - unsigned int capacity () const - { - return N; - } - - /*! \brief Returns a reference to a given element. - * - * \param index which element to access. * - * \note - * The caller is responsible for ensuring index is >= 0 and < size(). - */ - T& operator[](int index) - { - return data_[index]; - } - - /*! \brief Returns a const reference to a given element. - * - * \param index which element to access. - * - * \note - * The caller is responsible for ensuring index is >= 0 and < size(). - */ - const T& operator[](int index) const - { - return data_[index]; - } - - /*! \brief Assigns elements of the vector based on a source iterator range. - * - * \param start Beginning iterator of source range - * \param end Enditerator of source range - * - * \note - * Will throw an exception if exceptions are enabled and size exceeded. - */ - template - void assign(I start, I end) - { - clear(); - while(start != end) { - push_back(*start); - start++; - } - } - - /*! \class iterator - * \brief Const iterator class for vectors - */ - class iterator - { - private: - const vector *vec_; - int index_; - - /** - * Internal iterator constructor to capture reference - * to the vector it iterates over rather than taking - * the vector by copy. - */ - iterator (const vector &vec, int index) : - vec_(&vec) - { - if( !vec.empty() ) { - index_ = index; - } else { - index_ = -1; - } - } - - public: - iterator(void) : - index_(-1), - vec_(NULL) - { - } - - iterator(const iterator& rhs) : - vec_(rhs.vec_), - index_(rhs.index_) - { - } - - ~iterator(void) {} - - static iterator begin(const cl::vector &vec) - { - iterator i(vec, 0); - - return i; - } - - static iterator end(const cl::vector &vec) - { - iterator i(vec, vec.size()); - - return i; - } - - bool operator==(iterator i) - { - return ((vec_ == i.vec_) && - (index_ == i.index_)); - } - - bool operator!=(iterator i) - { - return (!(*this==i)); - } - - iterator& operator++() - { - ++index_; - return *this; - } - - iterator operator++(int) - { - iterator retVal(*this); - ++index_; - return retVal; - } - - iterator& operator--() - { - --index_; - return *this; - } - - iterator operator--(int) - { - iterator retVal(*this); - --index_; - return retVal; - } - - const T& operator *() const - { - return (*vec_)[index_]; - } - }; - - iterator begin(void) - { - return iterator::begin(*this); - } - - iterator begin(void) const - { - return iterator::begin(*this); - } - - iterator end(void) - { - return iterator::end(*this); - } - - iterator end(void) const - { - return iterator::end(*this); - } - - T& front(void) - { - return data_[0]; - } - - T& back(void) - { - return data_[size_]; - } - - const T& front(void) const - { - return data_[0]; - } - - const T& back(void) const - { - return data_[size_-1]; - } -}; -#endif // #if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) - - - - - -namespace detail { -#define __DEFAULT_NOT_INITIALIZED 1 -#define __DEFAULT_BEING_INITIALIZED 2 -#define __DEFAULT_INITIALIZED 4 - - /* - * Compare and exchange primitives are needed for handling of defaults - */ - inline int compare_exchange(volatile int * dest, int exchange, int comparand) - { -#ifdef _WIN32 - return (int)(InterlockedCompareExchange( - (volatile long*)dest, - (long)exchange, - (long)comparand)); -#elif defined(__APPLE__) || defined(__MACOSX) - return OSAtomicOr32Orig((uint32_t)exchange, (volatile uint32_t*)dest); -#else // !_WIN32 || defined(__APPLE__) || defined(__MACOSX) - return (__sync_val_compare_and_swap( - dest, - comparand, - exchange)); -#endif // !_WIN32 - } - - inline void fence() { _mm_mfence(); } -}; // namespace detail - - -/*! \brief class used to interface between C++ and - * OpenCL C calls that require arrays of size_t values, whose - * size is known statically. - */ -template -class size_t -{ -private: - ::size_t data_[N]; - -public: - //! \brief Initialize size_t to all 0s - size_t() - { - for( int i = 0; i < N; ++i ) { - data_[i] = 0; - } - } - - ::size_t& operator[](int index) - { - return data_[index]; - } - - const ::size_t& operator[](int index) const - { - return data_[index]; - } - - //! \brief Conversion operator to T*. - operator ::size_t* () { return data_; } - - //! \brief Conversion operator to const T*. - operator const ::size_t* () const { return data_; } -}; - -namespace detail { - -// Generic getInfoHelper. The final parameter is used to guide overload -// resolution: the actual parameter passed is an int, which makes this -// a worse conversion sequence than a specialization that declares the -// parameter as an int. -template -inline cl_int getInfoHelper(Functor f, cl_uint name, T* param, long) -{ - return f(name, sizeof(T), param, NULL); -} - -// Specialized getInfoHelper for VECTOR_CLASS params -template -inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, long) -{ - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - T* value = (T*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - param->assign(&value[0], &value[required/sizeof(T)]); - return CL_SUCCESS; -} - -/* Specialization for reference-counted types. This depends on the - * existence of Wrapper::cl_type, and none of the other types having the - * cl_type member. Note that simplify specifying the parameter as Wrapper - * does not work, because when using a derived type (e.g. Context) the generic - * template will provide a better match. - */ -template -inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, int, typename T::cl_type = 0) -{ - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - typename T::cl_type * value = (typename T::cl_type *) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - ::size_t elements = required / sizeof(typename T::cl_type); - param->assign(&value[0], &value[elements]); - for (::size_t i = 0; i < elements; i++) - { - if (value[i] != NULL) - { - err = (*param)[i].retain(); - if (err != CL_SUCCESS) { - return err; - } - } - } - return CL_SUCCESS; -} - -// Specialized for getInfo -template -inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, int) -{ - cl_int err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL); - - if (err != CL_SUCCESS) { - return err; - } - - return CL_SUCCESS; -} - -// Specialized GetInfoHelper for STRING_CLASS params -template -inline cl_int getInfoHelper(Func f, cl_uint name, STRING_CLASS* param, long) -{ - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - char* value = (char*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - *param = value; - return CL_SUCCESS; -} - -// Specialized GetInfoHelper for cl::size_t params -template -inline cl_int getInfoHelper(Func f, cl_uint name, size_t* param, long) -{ - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - ::size_t* value = (::size_t*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - for(int i = 0; i < N; ++i) { - (*param)[i] = value[i]; - } - - return CL_SUCCESS; -} - -template struct ReferenceHandler; - -/* Specialization for reference-counted types. This depends on the - * existence of Wrapper::cl_type, and none of the other types having the - * cl_type member. Note that simplify specifying the parameter as Wrapper - * does not work, because when using a derived type (e.g. Context) the generic - * template will provide a better match. - */ -template -inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_type = 0) -{ - typename T::cl_type value; - cl_int err = f(name, sizeof(value), &value, NULL); - if (err != CL_SUCCESS) { - return err; - } - *param = value; - if (value != NULL) - { - err = param->retain(); - if (err != CL_SUCCESS) { - return err; - } - } - return CL_SUCCESS; -} - -#define __PARAM_NAME_INFO_1_0(F) \ - F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ - F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ - F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ - F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ - F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \ - F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ - F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \ - F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ - F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \ - F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ - F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS) \ - F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS) \ - \ - F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ - F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ - F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ - F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_uint) \ - \ - F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ - \ - F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ - F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ - F(cl_mem_info, CL_MEM_SIZE, ::size_t) \ - F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ - F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ - \ - F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ - F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \ - F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \ - F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \ - F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \ - \ - F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ - F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ - F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \ - F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \ - F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \ - \ - F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ - F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ - F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ - F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS) \ - F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \ - F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \ - F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS) \ - \ - F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \ - \ - F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \ - F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ - F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \ - F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ - \ - F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ - F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ - F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ - F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) - -#if defined(CL_VERSION_1_1) -#define __PARAM_NAME_INFO_1_1(F) \ - F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \ - F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, STRING_CLASS) \ - \ - F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ - F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ - \ - F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) -#endif // CL_VERSION_1_1 - - -#if defined(CL_VERSION_1_2) -#define __PARAM_NAME_INFO_1_2(F) \ - F(cl_image_info, CL_IMAGE_BUFFER, cl::Buffer) \ - \ - F(cl_program_info, CL_PROGRAM_NUM_KERNELS, ::size_t) \ - F(cl_program_info, CL_PROGRAM_KERNEL_NAMES, STRING_CLASS) \ - \ - F(cl_program_build_info, CL_PROGRAM_BINARY_TYPE, cl_program_binary_type) \ - \ - F(cl_kernel_info, CL_KERNEL_ATTRIBUTES, STRING_CLASS) \ - \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_ADDRESS_QUALIFIER, cl_kernel_arg_address_qualifier) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_ACCESS_QUALIFIER, cl_kernel_arg_access_qualifier) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_NAME, STRING_CLASS) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, STRING_CLASS) \ - \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl_device_id) \ - F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPE, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, ::size_t) \ - F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \ - F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, STRING_CLASS) -#endif // #if defined(CL_VERSION_1_2) - -#if defined(USE_CL_DEVICE_FISSION) -#define __PARAM_NAME_DEVICE_FISSION(F) \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ - F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS) -#endif // USE_CL_DEVICE_FISSION - -template -struct param_traits {}; - -#define __CL_DECLARE_PARAM_TRAITS(token, param_name, T) \ -struct token; \ -template<> \ -struct param_traits \ -{ \ - enum { value = param_name }; \ - typedef T param_type; \ -}; - -__PARAM_NAME_INFO_1_0(__CL_DECLARE_PARAM_TRAITS) -#if defined(CL_VERSION_1_1) -__PARAM_NAME_INFO_1_1(__CL_DECLARE_PARAM_TRAITS) -#endif // CL_VERSION_1_1 -#if defined(CL_VERSION_1_2) -__PARAM_NAME_INFO_1_2(__CL_DECLARE_PARAM_TRAITS) -#endif // CL_VERSION_1_1 - -#if defined(USE_CL_DEVICE_FISSION) -__PARAM_NAME_DEVICE_FISSION(__CL_DECLARE_PARAM_TRAITS); -#endif // USE_CL_DEVICE_FISSION - -#ifdef CL_PLATFORM_ICD_SUFFIX_KHR -__CL_DECLARE_PARAM_TRAITS(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, STRING_CLASS) -#endif - -#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong) -#endif - -#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, VECTOR_CLASS< ::size_t>) -#endif -#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_SIMD_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint) -#endif - -#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint) -#endif -#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint) -#endif -#ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_REGISTERS_PER_BLOCK_NV, cl_uint) -#endif -#ifdef CL_DEVICE_WARP_SIZE_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WARP_SIZE_NV, cl_uint) -#endif -#ifdef CL_DEVICE_GPU_OVERLAP_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GPU_OVERLAP_NV, cl_bool) -#endif -#ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, cl_bool) -#endif -#ifdef CL_DEVICE_INTEGRATED_MEMORY_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_INTEGRATED_MEMORY_NV, cl_bool) -#endif - -// Convenience functions - -template -inline cl_int -getInfo(Func f, cl_uint name, T* param) -{ - return getInfoHelper(f, name, param, 0); -} - -template -struct GetInfoFunctor0 -{ - Func f_; const Arg0& arg0_; - cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) - { return f_(arg0_, param, size, value, size_ret); } -}; - -template -struct GetInfoFunctor1 -{ - Func f_; const Arg0& arg0_; const Arg1& arg1_; - cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) - { return f_(arg0_, arg1_, param, size, value, size_ret); } -}; - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) -{ - GetInfoFunctor0 f0 = { f, arg0 }; - return getInfoHelper(f0, name, param, 0); -} - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) -{ - GetInfoFunctor1 f0 = { f, arg0, arg1 }; - return getInfoHelper(f0, name, param, 0); -} - -template -struct ReferenceHandler -{ }; - -#if defined(CL_VERSION_1_2) -/** - * OpenCL 1.2 devices do have retain/release. - */ -template <> -struct ReferenceHandler -{ - /** - * Retain the device. - * \param device A valid device created using createSubDevices - * \return - * CL_SUCCESS if the function executed successfully. - * CL_INVALID_DEVICE if device was not a valid subdevice - * CL_OUT_OF_RESOURCES - * CL_OUT_OF_HOST_MEMORY - */ - static cl_int retain(cl_device_id device) - { return ::clRetainDevice(device); } - /** - * Retain the device. - * \param device A valid device created using createSubDevices - * \return - * CL_SUCCESS if the function executed successfully. - * CL_INVALID_DEVICE if device was not a valid subdevice - * CL_OUT_OF_RESOURCES - * CL_OUT_OF_HOST_MEMORY - */ - static cl_int release(cl_device_id device) - { return ::clReleaseDevice(device); } -}; -#else // #if defined(CL_VERSION_1_2) -/** - * OpenCL 1.1 devices do not have retain/release. - */ -template <> -struct ReferenceHandler -{ - // cl_device_id does not have retain(). - static cl_int retain(cl_device_id) - { return CL_SUCCESS; } - // cl_device_id does not have release(). - static cl_int release(cl_device_id) - { return CL_SUCCESS; } -}; -#endif // #if defined(CL_VERSION_1_2) - -template <> -struct ReferenceHandler -{ - // cl_platform_id does not have retain(). - static cl_int retain(cl_platform_id) - { return CL_SUCCESS; } - // cl_platform_id does not have release(). - static cl_int release(cl_platform_id) - { return CL_SUCCESS; } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_context context) - { return ::clRetainContext(context); } - static cl_int release(cl_context context) - { return ::clReleaseContext(context); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_command_queue queue) - { return ::clRetainCommandQueue(queue); } - static cl_int release(cl_command_queue queue) - { return ::clReleaseCommandQueue(queue); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_mem memory) - { return ::clRetainMemObject(memory); } - static cl_int release(cl_mem memory) - { return ::clReleaseMemObject(memory); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_sampler sampler) - { return ::clRetainSampler(sampler); } - static cl_int release(cl_sampler sampler) - { return ::clReleaseSampler(sampler); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_program program) - { return ::clRetainProgram(program); } - static cl_int release(cl_program program) - { return ::clReleaseProgram(program); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_kernel kernel) - { return ::clRetainKernel(kernel); } - static cl_int release(cl_kernel kernel) - { return ::clReleaseKernel(kernel); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_event event) - { return ::clRetainEvent(event); } - static cl_int release(cl_event event) - { return ::clReleaseEvent(event); } -}; - - -// Extracts version number with major in the upper 16 bits, minor in the lower 16 -static cl_uint getVersion(const char *versionInfo) -{ - int highVersion = 0; - int lowVersion = 0; - int index = 7; - while(versionInfo[index] != '.' ) { - highVersion *= 10; - highVersion += versionInfo[index]-'0'; - ++index; - } - ++index; - while(versionInfo[index] != ' ' ) { - lowVersion *= 10; - lowVersion += versionInfo[index]-'0'; - ++index; - } - return (highVersion << 16) | lowVersion; -} - -static cl_uint getPlatformVersion(cl_platform_id platform) -{ - ::size_t size = 0; - clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &size); - char *versionInfo = (char *) alloca(size); - clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, &versionInfo[0], &size); - return getVersion(versionInfo); -} - -static cl_uint getDevicePlatformVersion(cl_device_id device) -{ - cl_platform_id platform; - clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL); - return getPlatformVersion(platform); -} - -#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -static cl_uint getContextPlatformVersion(cl_context context) -{ - // The platform cannot be queried directly, so we first have to grab a - // device and obtain its context - ::size_t size = 0; - clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size); - if (size == 0) - return 0; - cl_device_id *devices = (cl_device_id *) alloca(size); - clGetContextInfo(context, CL_CONTEXT_DEVICES, size, devices, NULL); - return getDevicePlatformVersion(devices[0]); -} -#endif // #if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - -template -class Wrapper -{ -public: - typedef T cl_type; - -protected: - cl_type object_; - -public: - Wrapper() : object_(NULL) { } - - Wrapper(const cl_type &obj) : object_(obj) { } - - ~Wrapper() - { - if (object_ != NULL) { release(); } - } - - Wrapper(const Wrapper& rhs) - { - object_ = rhs.object_; - if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } - } - - Wrapper& operator = (const Wrapper& rhs) - { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } - object_ = rhs.object_; - if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } - return *this; - } - - Wrapper& operator = (const cl_type &rhs) - { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } - object_ = rhs; - return *this; - } - - cl_type operator ()() const { return object_; } - - cl_type& operator ()() { return object_; } - -protected: - template - friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); - - cl_int retain() const - { - return ReferenceHandler::retain(object_); - } - - cl_int release() const - { - return ReferenceHandler::release(object_); - } -}; - -template <> -class Wrapper -{ -public: - typedef cl_device_id cl_type; - -protected: - cl_type object_; - bool referenceCountable_; - - static bool isReferenceCountable(cl_device_id device) - { - bool retVal = false; - if (device != NULL) { - int version = getDevicePlatformVersion(device); - if(version > ((1 << 16) + 1)) { - retVal = true; - } - } - return retVal; - } - -public: - Wrapper() : object_(NULL), referenceCountable_(false) - { - } - - Wrapper(const cl_type &obj) : object_(obj), referenceCountable_(false) - { - referenceCountable_ = isReferenceCountable(obj); - } - - ~Wrapper() - { - if (object_ != NULL) { release(); } - } - - Wrapper(const Wrapper& rhs) - { - object_ = rhs.object_; - referenceCountable_ = isReferenceCountable(object_); - if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } - } - - Wrapper& operator = (const Wrapper& rhs) - { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } - object_ = rhs.object_; - referenceCountable_ = rhs.referenceCountable_; - if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } - return *this; - } - - Wrapper& operator = (const cl_type &rhs) - { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } - object_ = rhs; - referenceCountable_ = isReferenceCountable(object_); - return *this; - } - - cl_type operator ()() const { return object_; } - - cl_type& operator ()() { return object_; } - -protected: - template - friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); - - template - friend inline cl_int getInfoHelper(Func, cl_uint, VECTOR_CLASS*, int, typename U::cl_type); - - cl_int retain() const - { - if( referenceCountable_ ) { - return ReferenceHandler::retain(object_); - } - else { - return CL_SUCCESS; - } - } - - cl_int release() const - { - if( referenceCountable_ ) { - return ReferenceHandler::release(object_); - } - else { - return CL_SUCCESS; - } - } -}; - -} // namespace detail -//! \endcond - -/*! \stuct ImageFormat - * \brief Adds constructors and member functions for cl_image_format. - * - * \see cl_image_format - */ -struct ImageFormat : public cl_image_format -{ - //! \brief Default constructor - performs no initialization. - ImageFormat(){} - - //! \brief Initializing constructor. - ImageFormat(cl_channel_order order, cl_channel_type type) - { - image_channel_order = order; - image_channel_data_type = type; - } - - //! \brief Assignment operator. - ImageFormat& operator = (const ImageFormat& rhs) - { - if (this != &rhs) { - this->image_channel_data_type = rhs.image_channel_data_type; - this->image_channel_order = rhs.image_channel_order; - } - return *this; - } -}; - -/*! \brief Class interface for cl_device_id. - * - * \note Copies of these objects are inexpensive, since they don't 'own' - * any underlying resources or data structures. - * - * \see cl_device_id - */ -class Device : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to NULL. - Device() : detail::Wrapper() { } - - /*! \brief Copy constructor. - * - * This simply copies the device ID value, which is an inexpensive operation. - */ - Device(const Device& device) : detail::Wrapper(device) { } - - /*! \brief Constructor from cl_device_id. - * - * This simply copies the device ID value, which is an inexpensive operation. - */ - Device(const cl_device_id &device) : detail::Wrapper(device) { } - - /*! \brief Returns the first device on the default context. - * - * \see Context::getDefault() - */ - static Device getDefault(cl_int * err = NULL); - - /*! \brief Assignment operator from Device. - * - * This simply copies the device ID value, which is an inexpensive operation. - */ - Device& operator = (const Device& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment operator from cl_device_id. - * - * This simply copies the device ID value, which is an inexpensive operation. - */ - Device& operator = (const cl_device_id& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetDeviceInfo(). - template - cl_int getInfo(cl_device_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetDeviceInfo, object_, name, param), - __GET_DEVICE_INFO_ERR); - } - - //! \brief Wrapper for clGetDeviceInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_device_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - /** - * CL 1.2 version - */ -#if defined(CL_VERSION_1_2) - //! \brief Wrapper for clCreateSubDevicesEXT(). - cl_int createSubDevices( - const cl_device_partition_property * properties, - VECTOR_CLASS* devices) - { - cl_uint n = 0; - cl_int err = clCreateSubDevices(object_, properties, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = clCreateSubDevices(object_, properties, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif // #if defined(CL_VERSION_1_2) - -/** - * CL 1.1 version that uses device fission. - */ -#if defined(CL_VERSION_1_1) -#if defined(USE_CL_DEVICE_FISSION) - cl_int createSubDevices( - const cl_device_partition_property_ext * properties, - VECTOR_CLASS* devices) - { - typedef CL_API_ENTRY cl_int - ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( - cl_device_id /*in_device*/, - const cl_device_partition_property_ext * /* properties */, - cl_uint /*num_entries*/, - cl_device_id * /*out_devices*/, - cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; - - static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; - __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT); - - cl_uint n = 0; - cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif // #if defined(USE_CL_DEVICE_FISSION) -#endif // #if defined(CL_VERSION_1_1) -}; - -/*! \brief Class interface for cl_platform_id. - * - * \note Copies of these objects are inexpensive, since they don't 'own' - * any underlying resources or data structures. - * - * \see cl_platform_id - */ -class Platform : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to NULL. - Platform() : detail::Wrapper() { } - - /*! \brief Copy constructor. - * - * This simply copies the platform ID value, which is an inexpensive operation. - */ - Platform(const Platform& platform) : detail::Wrapper(platform) { } - - /*! \brief Constructor from cl_platform_id. - * - * This simply copies the platform ID value, which is an inexpensive operation. - */ - Platform(const cl_platform_id &platform) : detail::Wrapper(platform) { } - - /*! \brief Assignment operator from Platform. - * - * This simply copies the platform ID value, which is an inexpensive operation. - */ - Platform& operator = (const Platform& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment operator from cl_platform_id. - * - * This simply copies the platform ID value, which is an inexpensive operation. - */ - Platform& operator = (const cl_platform_id& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetPlatformInfo(). - cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetPlatformInfo, object_, name, param), - __GET_PLATFORM_INFO_ERR); - } - - //! \brief Wrapper for clGetPlatformInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_platform_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - /*! \brief Gets a list of devices for this platform. - * - * Wraps clGetDeviceIDs(). - */ - cl_int getDevices( - cl_device_type type, - VECTOR_CLASS* devices) const - { - cl_uint n = 0; - if( devices == NULL ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); - } - cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = ::clGetDeviceIDs(object_, type, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } - -#if defined(USE_DX_INTEROP) - /*! \brief Get the list of available D3D10 devices. - * - * \param d3d_device_source. - * - * \param d3d_object. - * - * \param d3d_device_set. - * - * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device - * values returned in devices can be used to identify a specific OpenCL - * device. If \a devices argument is NULL, this argument is ignored. - * - * \return One of the following values: - * - CL_SUCCESS if the function is executed successfully. - * - * The application can query specific capabilities of the OpenCL device(s) - * returned by cl::getDevices. This can be used by the application to - * determine which device(s) to use. - * - * \note In the case that exceptions are enabled and a return value - * other than CL_SUCCESS is generated, then cl::Error exception is - * generated. - */ - cl_int getDevices( - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - VECTOR_CLASS* devices) const - { - typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( - cl_platform_id platform, - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id * devices, - cl_uint* num_devices); - - if( devices == NULL ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); - } - - static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; - __INIT_CL_EXT_FCN_PTR_PLATFORM(object_, clGetDeviceIDsFromD3D10KHR); - - cl_uint n = 0; - cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - 0, - NULL, - &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - n, - ids, - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif - - /*! \brief Gets a list of available platforms. - * - * Wraps clGetPlatformIDs(). - */ - static cl_int get( - VECTOR_CLASS* platforms) - { - cl_uint n = 0; - - if( platforms == NULL ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); - } - - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - cl_platform_id* ids = (cl_platform_id*) alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - platforms->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } - - /*! \brief Gets the first available platform. - * - * Wraps clGetPlatformIDs(), returning the first result. - */ - static cl_int get( - Platform * platform) - { - cl_uint n = 0; - - if( platform == NULL ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); - } - - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - cl_platform_id* ids = (cl_platform_id*) alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - *platform = ids[0]; - return CL_SUCCESS; - } - - /*! \brief Gets the first available platform, returning it by value. - * - * Wraps clGetPlatformIDs(), returning the first result. - */ - static Platform get( - cl_int * errResult = NULL) - { - Platform platform; - cl_uint n = 0; - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - if (errResult != NULL) { - *errResult = err; - } - } - - cl_platform_id* ids = (cl_platform_id*) alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - - if (err != CL_SUCCESS) { - detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - if (errResult != NULL) { - *errResult = err; - } - - return ids[0]; - } - - static Platform getDefault( - cl_int *errResult = NULL ) - { - return get(errResult); - } - - -#if defined(CL_VERSION_1_2) - //! \brief Wrapper for clUnloadCompiler(). - cl_int - unloadCompiler() - { - return ::clUnloadPlatformCompiler(object_); - } -#endif // #if defined(CL_VERSION_1_2) -}; // class Platform - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) -/** - * Unload the OpenCL compiler. - * \note Deprecated for OpenCL 1.2. Use Platform::unloadCompiler instead. - */ -inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int -UnloadCompiler() CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; -inline cl_int -UnloadCompiler() -{ - return ::clUnloadCompiler(); -} -#endif // #if defined(CL_VERSION_1_1) - -/*! \brief Class interface for cl_context. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_context as the original. For details, see - * clRetainContext() and clReleaseContext(). - * - * \see cl_context - */ -class Context - : public detail::Wrapper -{ -private: - static volatile int default_initialized_; - static Context default_; - static volatile cl_int default_error_; -public: - /*! \brief Destructor. - * - * This calls clReleaseContext() on the value held by this instance. - */ - ~Context() { } - - /*! \brief Constructs a context including a list of specified devices. - * - * Wraps clCreateContext(). - */ - Context( - const VECTOR_CLASS& devices, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - - ::size_t numDevices = devices.size(); - cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); - for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - object_ = ::clCreateContext( - properties, (cl_uint) numDevices, - deviceIDs, - notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_ERR); - if (err != NULL) { - *err = error; - } - } - - Context( - const Device& device, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - - cl_device_id deviceID = device(); - - object_ = ::clCreateContext( - properties, 1, - &deviceID, - notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Constructs a context including all or a subset of devices of a specified type. - * - * Wraps clCreateContextFromType(). - */ - Context( - cl_device_type type, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - -#if !defined(__APPLE__) || !defined(__MACOS) - cl_context_properties prop[4] = {CL_CONTEXT_PLATFORM, 0, 0, 0 }; - - if (properties == NULL) { - // Get a valid platform ID as we cannot send in a blank one - VECTOR_CLASS platforms; - error = Platform::get(&platforms); - if (error != CL_SUCCESS) { - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - return; - } - - // Check the platforms we found for a device of our specified type - cl_context_properties platform_id = 0; - for (unsigned int i = 0; i < platforms.size(); i++) { - - VECTOR_CLASS devices; - -#if defined(__CL_ENABLE_EXCEPTIONS) - try { -#endif - - error = platforms[i].getDevices(type, &devices); - -#if defined(__CL_ENABLE_EXCEPTIONS) - } catch (Error) {} - // Catch if exceptions are enabled as we don't want to exit if first platform has no devices of type - // We do error checking next anyway, and can throw there if needed -#endif - - // Only squash CL_SUCCESS and CL_DEVICE_NOT_FOUND - if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND) { - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - if (devices.size() > 0) { - platform_id = (cl_context_properties)platforms[i](); - break; - } - } - - if (platform_id == 0) { - detail::errHandler(CL_DEVICE_NOT_FOUND, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = CL_DEVICE_NOT_FOUND; - } - return; - } - - prop[1] = platform_id; - properties = &prop[0]; - } -#endif - object_ = ::clCreateContextFromType( - properties, type, notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Returns a singleton context including all devices of CL_DEVICE_TYPE_DEFAULT. - * - * \note All calls to this function return the same cl_context as the first. - */ - static Context getDefault(cl_int * err = NULL) - { - int state = detail::compare_exchange( - &default_initialized_, - __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); - - if (state & __DEFAULT_INITIALIZED) { - if (err != NULL) { - *err = default_error_; - } - return default_; - } - - if (state & __DEFAULT_BEING_INITIALIZED) { - // Assume writes will propagate eventually... - while(default_initialized_ != __DEFAULT_INITIALIZED) { - detail::fence(); - } - - if (err != NULL) { - *err = default_error_; - } - return default_; - } - - cl_int error; - default_ = Context( - CL_DEVICE_TYPE_DEFAULT, - NULL, - NULL, - NULL, - &error); - - detail::fence(); - - default_error_ = error; - // Assume writes will propagate eventually... - default_initialized_ = __DEFAULT_INITIALIZED; - - detail::fence(); - - if (err != NULL) { - *err = default_error_; - } - return default_; - - } - - //! \brief Default constructor - initializes to NULL. - Context() : detail::Wrapper() { } - - /*! \brief Copy constructor. - * - * This calls clRetainContext() on the parameter's cl_context. - */ - Context(const Context& context) : detail::Wrapper(context) { } - - /*! \brief Constructor from cl_context - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_context - * into the new Context object. - */ - __CL_EXPLICIT_CONSTRUCTORS Context(const cl_context& context) : detail::Wrapper(context) { } - - /*! \brief Assignment operator from Context. - * - * This calls clRetainContext() on the parameter and clReleaseContext() on - * the previous value held by this instance. - */ - Context& operator = (const Context& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment operator from cl_context - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseContext() on the value previously held by this instance. - */ - Context& operator = (const cl_context& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetContextInfo(). - template - cl_int getInfo(cl_context_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetContextInfo, object_, name, param), - __GET_CONTEXT_INFO_ERR); - } - - //! \brief Wrapper for clGetContextInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_context_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - /*! \brief Gets a list of supported image formats. - * - * Wraps clGetSupportedImageFormats(). - */ - cl_int getSupportedImageFormats( - cl_mem_flags flags, - cl_mem_object_type type, - VECTOR_CLASS* formats) const - { - cl_uint numEntries; - cl_int err = ::clGetSupportedImageFormats( - object_, - flags, - type, - 0, - NULL, - &numEntries); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - ImageFormat* value = (ImageFormat*) - alloca(numEntries * sizeof(ImageFormat)); - err = ::clGetSupportedImageFormats( - object_, - flags, - type, - numEntries, - (cl_image_format*) value, - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - formats->assign(&value[0], &value[numEntries]); - return CL_SUCCESS; - } -}; - -inline Device Device::getDefault(cl_int * err) -{ - cl_int error; - Device device; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - - if (error != CL_SUCCESS) { - if (err != NULL) { - *err = error; - } - } - else { - device = context.getInfo()[0]; - if (err != NULL) { - *err = CL_SUCCESS; - } - } - - return device; -} - - -#ifdef _WIN32 -__declspec(selectany) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; -__declspec(selectany) Context Context::default_; -__declspec(selectany) volatile cl_int Context::default_error_ = CL_SUCCESS; -#else -__attribute__((weak)) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; -__attribute__((weak)) Context Context::default_; -__attribute__((weak)) volatile cl_int Context::default_error_ = CL_SUCCESS; -#endif - -/*! \brief Class interface for cl_event. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_event as the original. For details, see - * clRetainEvent() and clReleaseEvent(). - * - * \see cl_event - */ -class Event : public detail::Wrapper -{ -public: - /*! \brief Destructor. - * - * This calls clReleaseEvent() on the value held by this instance. - */ - ~Event() { } - - //! \brief Default constructor - initializes to NULL. - Event() : detail::Wrapper() { } - - /*! \brief Copy constructor. - * - * This calls clRetainEvent() on the parameter's cl_event. - */ - Event(const Event& event) : detail::Wrapper(event) { } - - /*! \brief Constructor from cl_event - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_event - * into the new Event object. - */ - Event(const cl_event& event) : detail::Wrapper(event) { } - - /*! \brief Assignment operator from cl_event - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseEvent() on the value previously held by this instance. - */ - Event& operator = (const Event& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment operator from cl_event. - * - * This calls clRetainEvent() on the parameter and clReleaseEvent() on - * the previous value held by this instance. - */ - Event& operator = (const cl_event& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetEventInfo(). - template - cl_int getInfo(cl_event_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetEventInfo, object_, name, param), - __GET_EVENT_INFO_ERR); - } - - //! \brief Wrapper for clGetEventInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_event_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - //! \brief Wrapper for clGetEventProfilingInfo(). - template - cl_int getProfilingInfo(cl_profiling_info name, T* param) const - { - return detail::errHandler(detail::getInfo( - &::clGetEventProfilingInfo, object_, name, param), - __GET_EVENT_PROFILE_INFO_ERR); - } - - //! \brief Wrapper for clGetEventProfilingInfo() that returns by value. - template typename - detail::param_traits::param_type - getProfilingInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_profiling_info, name>::param_type param; - cl_int result = getProfilingInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - /*! \brief Blocks the calling thread until this event completes. - * - * Wraps clWaitForEvents(). - */ - cl_int wait() const - { - return detail::errHandler( - ::clWaitForEvents(1, &object_), - __WAIT_FOR_EVENTS_ERR); - } - -#if defined(CL_VERSION_1_1) - /*! \brief Registers a user callback function for a specific command execution status. - * - * Wraps clSetEventCallback(). - */ - cl_int setCallback( - cl_int type, - void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetEventCallback( - object_, - type, - pfn_notify, - user_data), - __SET_EVENT_CALLBACK_ERR); - } -#endif - - /*! \brief Blocks the calling thread until every event specified is complete. - * - * Wraps clWaitForEvents(). - */ - static cl_int - waitForEvents(const VECTOR_CLASS& events) - { - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (cl_event*)&events.front()), - __WAIT_FOR_EVENTS_ERR); - } -}; - -#if defined(CL_VERSION_1_1) -/*! \brief Class interface for user events (a subset of cl_event's). - * - * See Event for details about copy semantics, etc. - */ -class UserEvent : public Event -{ -public: - /*! \brief Constructs a user event on a given context. - * - * Wraps clCreateUserEvent(). - */ - UserEvent( - const Context& context, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateUserEvent( - context(), - &error); - - detail::errHandler(error, __CREATE_USER_EVENT_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - UserEvent() : Event() { } - - //! \brief Copy constructor - performs shallow copy. - UserEvent(const UserEvent& event) : Event(event) { } - - //! \brief Assignment Operator - performs shallow copy. - UserEvent& operator = (const UserEvent& rhs) - { - if (this != &rhs) { - Event::operator=(rhs); - } - return *this; - } - - /*! \brief Sets the execution status of a user event object. - * - * Wraps clSetUserEventStatus(). - */ - cl_int setStatus(cl_int status) - { - return detail::errHandler( - ::clSetUserEventStatus(object_,status), - __SET_USER_EVENT_STATUS_ERR); - } -}; -#endif - -/*! \brief Blocks the calling thread until every event specified is complete. - * - * Wraps clWaitForEvents(). - */ -inline static cl_int -WaitForEvents(const VECTOR_CLASS& events) -{ - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (cl_event*)&events.front()), - __WAIT_FOR_EVENTS_ERR); -} - -/*! \brief Class interface for cl_mem. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_mem as the original. For details, see - * clRetainMemObject() and clReleaseMemObject(). - * - * \see cl_mem - */ -class Memory : public detail::Wrapper -{ -public: - - /*! \brief Destructor. - * - * This calls clReleaseMemObject() on the value held by this instance. - */ - ~Memory() {} - - //! \brief Default constructor - initializes to NULL. - Memory() : detail::Wrapper() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * This calls clRetainMemObject() on the parameter's cl_mem. - */ - Memory(const Memory& memory) : detail::Wrapper(memory) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_mem - * into the new Memory object. - */ - __CL_EXPLICIT_CONSTRUCTORS Memory(const cl_mem& memory) : detail::Wrapper(memory) { } - - /*! \brief Assignment operator from Memory. - * - * This calls clRetainMemObject() on the parameter and clReleaseMemObject() - * on the previous value held by this instance. - */ - Memory& operator = (const Memory& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment operator from cl_mem - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseMemObject() on the value previously held by this instance. - */ - Memory& operator = (const cl_mem& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetMemObjectInfo(). - template - cl_int getInfo(cl_mem_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetMemObjectInfo, object_, name, param), - __GET_MEM_OBJECT_INFO_ERR); - } - - //! \brief Wrapper for clGetMemObjectInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_mem_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if defined(CL_VERSION_1_1) - /*! \brief Registers a callback function to be called when the memory object - * is no longer needed. - * - * Wraps clSetMemObjectDestructorCallback(). - * - * Repeated calls to this function, for a given cl_mem value, will append - * to the list of functions called (in reverse order) when memory object's - * resources are freed and the memory object is deleted. - * - * \note - * The registered callbacks are associated with the underlying cl_mem - * value - not the Memory class instance. - */ - cl_int setDestructorCallback( - void (CL_CALLBACK * pfn_notify)(cl_mem, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetMemObjectDestructorCallback( - object_, - pfn_notify, - user_data), - __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); - } -#endif - -}; - -// Pre-declare copy functions -class Buffer; -template< typename IteratorType > -cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); -template< typename IteratorType > -cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); -template< typename IteratorType > -cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); -template< typename IteratorType > -cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); - - -/*! \brief Class interface for Buffer Memory Objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Buffer : public Memory -{ -public: - - /*! \brief Constructs a Buffer in a specified context. - * - * Wraps clCreateBuffer(). - * - * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was - * specified. Note alignment & exclusivity requirements. - */ - Buffer( - const Context& context, - cl_mem_flags flags, - ::size_t size, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Constructs a Buffer in the default context. - * - * Wraps clCreateBuffer(). - * - * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was - * specified. Note alignment & exclusivity requirements. - * - * \see Context::getDefault() - */ - Buffer( - cl_mem_flags flags, - ::size_t size, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - - Context context = Context::getDefault(err); - - object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! - * \brief Construct a Buffer from a host container via iterators. - * IteratorType must be random access. - * If useHostPtr is specified iterators must represent contiguous data. - */ - template< typename IteratorType > - Buffer( - IteratorType startIterator, - IteratorType endIterator, - bool readOnly, - bool useHostPtr = false, - cl_int* err = NULL) - { - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - cl_mem_flags flags = 0; - if( readOnly ) { - flags |= CL_MEM_READ_ONLY; - } - else { - flags |= CL_MEM_READ_WRITE; - } - if( useHostPtr ) { - flags |= CL_MEM_USE_HOST_PTR; - } - - ::size_t size = sizeof(DataType)*(endIterator - startIterator); - - Context context = Context::getDefault(err); - - if( useHostPtr ) { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); - } else { - object_ = ::clCreateBuffer(context(), flags, size, 0, &error); - } - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - if( !useHostPtr ) { - error = cl::copy(startIterator, endIterator, *this); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - } - - /*! - * \brief Construct a Buffer from a host container via iterators using a specified context. - * IteratorType must be random access. - * If useHostPtr is specified iterators must represent contiguous data. - */ - template< typename IteratorType > - Buffer(const Context &context, IteratorType startIterator, IteratorType endIterator, - bool readOnly, bool useHostPtr = false, cl_int* err = NULL); - - //! \brief Default constructor - initializes to NULL. - Buffer() : Memory() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * See Memory for further details. - */ - Buffer(const Buffer& buffer) : Memory(buffer) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Buffer(const cl_mem& buffer) : Memory(buffer) { } - - /*! \brief Assignment from Buffer - performs shallow copy. - * - * See Memory for further details. - */ - Buffer& operator = (const Buffer& rhs) - { - if (this != &rhs) { - Memory::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Buffer& operator = (const cl_mem& rhs) - { - Memory::operator=(rhs); - return *this; - } - -#if defined(CL_VERSION_1_1) - /*! \brief Creates a new buffer object from this. - * - * Wraps clCreateSubBuffer(). - */ - Buffer createSubBuffer( - cl_mem_flags flags, - cl_buffer_create_type buffer_create_type, - const void * buffer_create_info, - cl_int * err = NULL) - { - Buffer result; - cl_int error; - result.object_ = ::clCreateSubBuffer( - object_, - flags, - buffer_create_type, - buffer_create_info, - &error); - - detail::errHandler(error, __CREATE_SUBBUFFER_ERR); - if (err != NULL) { - *err = error; - } - - return result; - } -#endif -}; - -#if defined (USE_DX_INTEROP) -/*! \brief Class interface for creating OpenCL buffers from ID3D10Buffer's. - * - * This is provided to facilitate interoperability with Direct3D. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class BufferD3D10 : public Buffer -{ -public: - typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( - cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, - cl_int* errcode_ret); - - /*! \brief Constructs a BufferD3D10, in a specified context, from a - * given ID3D10Buffer. - * - * Wraps clCreateFromD3D10BufferKHR(). - */ - BufferD3D10( - const Context& context, - cl_mem_flags flags, - ID3D10Buffer* bufobj, - cl_int * err = NULL) - { - static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL; - -#if defined(CL_VERSION_1_2) - vector props = context.getInfo(); - cl_platform platform = -1; - for( int i = 0; i < props.size(); ++i ) { - if( props[i] == CL_CONTEXT_PLATFORM ) { - platform = props[i+1]; - } - } - __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clCreateFromD3D10BufferKHR); -#endif -#if defined(CL_VERSION_1_1) - __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR); -#endif - - cl_int error; - object_ = pfn_clCreateFromD3D10BufferKHR( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - BufferD3D10() : Buffer() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * See Memory for further details. - */ - BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS BufferD3D10(const cl_mem& buffer) : Buffer(buffer) { } - - /*! \brief Assignment from BufferD3D10 - performs shallow copy. - * - * See Memory for further details. - */ - BufferD3D10& operator = (const BufferD3D10& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferD3D10& operator = (const cl_mem& rhs) - { - Buffer::operator=(rhs); - return *this; - } -}; -#endif - -/*! \brief Class interface for GL Buffer Memory Objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class BufferGL : public Buffer -{ -public: - /*! \brief Constructs a BufferGL in a specified context, from a given - * GL buffer. - * - * Wraps clCreateFromGLBuffer(). - */ - BufferGL( - const Context& context, - cl_mem_flags flags, - GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLBuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - BufferGL() : Buffer() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * See Memory for further details. - */ - BufferGL(const BufferGL& buffer) : Buffer(buffer) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS BufferGL(const cl_mem& buffer) : Buffer(buffer) { } - - /*! \brief Assignment from BufferGL - performs shallow copy. - * - * See Memory for further details. - */ - BufferGL& operator = (const BufferGL& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferGL& operator = (const cl_mem& rhs) - { - Buffer::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetGLObjectInfo(). - cl_int getObjectInfo( - cl_gl_object_type *type, - GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \brief Class interface for GL Render Buffer Memory Objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class BufferRenderGL : public Buffer -{ -public: - /*! \brief Constructs a BufferRenderGL in a specified context, from a given - * GL Renderbuffer. - * - * Wraps clCreateFromGLRenderbuffer(). - */ - BufferRenderGL( - const Context& context, - cl_mem_flags flags, - GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLRenderbuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - BufferRenderGL() : Buffer() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * See Memory for further details. - */ - BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS BufferRenderGL(const cl_mem& buffer) : Buffer(buffer) { } - - /*! \brief Assignment from BufferGL - performs shallow copy. - * - * See Memory for further details. - */ - BufferRenderGL& operator = (const BufferRenderGL& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferRenderGL& operator = (const cl_mem& rhs) - { - Buffer::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetGLObjectInfo(). - cl_int getObjectInfo( - cl_gl_object_type *type, - GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \brief C++ base class for Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image : public Memory -{ -protected: - //! \brief Default constructor - initializes to NULL. - Image() : Memory() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * See Memory for further details. - */ - Image(const Image& image) : Memory(image) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image(const cl_mem& image) : Memory(image) { } - - /*! \brief Assignment from Image - performs shallow copy. - * - * See Memory for further details. - */ - Image& operator = (const Image& rhs) - { - if (this != &rhs) { - Memory::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image& operator = (const cl_mem& rhs) - { - Memory::operator=(rhs); - return *this; - } - -public: - //! \brief Wrapper for clGetImageInfo(). - template - cl_int getImageInfo(cl_image_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetImageInfo, object_, name, param), - __GET_IMAGE_INFO_ERR); - } - - //! \brief Wrapper for clGetImageInfo() that returns by value. - template typename - detail::param_traits::param_type - getImageInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_image_info, name>::param_type param; - cl_int result = getImageInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -#if defined(CL_VERSION_1_2) -/*! \brief Class interface for 1D Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image1D : public Image -{ -public: - /*! \brief Constructs a 1D Image in a specified context. - * - * Wraps clCreateImage(). - */ - Image1D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D, - width, - 0, 0, 0, 0, 0, 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - Image1D() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * See Memory for further details. - */ - Image1D(const Image1D& image1D) : Image(image1D) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image1D(const cl_mem& image1D) : Image(image1D) { } - - /*! \brief Assignment from Image1D - performs shallow copy. - * - * See Memory for further details. - */ - Image1D& operator = (const Image1D& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image1D& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } -}; - -/*! \class Image1DBuffer - * \brief Image interface for 1D buffer images. - */ -class Image1DBuffer : public Image -{ -public: - Image1DBuffer( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - const Buffer &buffer, - cl_int* err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D_BUFFER, - width, - 0, 0, 0, 0, 0, 0, 0, - buffer() - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - NULL, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } - - Image1DBuffer() { } - - Image1DBuffer(const Image1DBuffer& image1D) : Image(image1D) { } - - __CL_EXPLICIT_CONSTRUCTORS Image1DBuffer(const cl_mem& image1D) : Image(image1D) { } - - Image1DBuffer& operator = (const Image1DBuffer& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } - - Image1DBuffer& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } -}; - -/*! \class Image1DArray - * \brief Image interface for arrays of 1D images. - */ -class Image1DArray : public Image -{ -public: - Image1DArray( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t arraySize, - ::size_t width, - ::size_t rowPitch, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D_ARRAY, - width, - 0, 0, // height, depth (unused) - arraySize, - rowPitch, - 0, 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } - - Image1DArray() { } - - Image1DArray(const Image1DArray& imageArray) : Image(imageArray) { } - - __CL_EXPLICIT_CONSTRUCTORS Image1DArray(const cl_mem& imageArray) : Image(imageArray) { } - - Image1DArray& operator = (const Image1DArray& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } - - Image1DArray& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } -}; -#endif // #if defined(CL_VERSION_1_2) - - -/*! \brief Class interface for 2D Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image2D : public Image -{ -public: - /*! \brief Constructs a 1D Image in a specified context. - * - * Wraps clCreateImage(). - */ - Image2D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t row_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - bool useCreateImage; - -#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above - } -#elif defined(CL_VERSION_1_2) - useCreateImage = true; -#else - useCreateImage = false; -#endif - -#if defined(CL_VERSION_1_2) - if (useCreateImage) - { - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D, - width, - height, - 0, 0, // depth, array size (unused) - row_pitch, - 0, 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // #if defined(CL_VERSION_1_2) -#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - if (!useCreateImage) - { - object_ = ::clCreateImage2D( - context(), flags,&format, width, height, row_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE2D_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - } - - //! \brief Default constructor - initializes to NULL. - Image2D() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * See Memory for further details. - */ - Image2D(const Image2D& image2D) : Image(image2D) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image2D(const cl_mem& image2D) : Image(image2D) { } - - /*! \brief Assignment from Image2D - performs shallow copy. - * - * See Memory for further details. - */ - Image2D& operator = (const Image2D& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image2D& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } -}; - - -#if !defined(CL_VERSION_1_2) -/*! \brief Class interface for GL 2D Image Memory objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - * \note Deprecated for OpenCL 1.2. Please use ImageGL instead. - */ -class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED : public Image2D -{ -public: - /*! \brief Constructs an Image2DGL in a specified context, from a given - * GL Texture. - * - * Wraps clCreateFromGLTexture2D(). - */ - Image2DGL( - const Context& context, - cl_mem_flags flags, - GLenum target, - GLint miplevel, - GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture2D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_TEXTURE_2D_ERR); - if (err != NULL) { - *err = error; - } - - } - - //! \brief Default constructor - initializes to NULL. - Image2DGL() : Image2D() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * See Memory for further details. - */ - Image2DGL(const Image2DGL& image) : Image2D(image) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image2DGL(const cl_mem& image) : Image2D(image) { } - - /*! \brief Assignment from Image2DGL - performs shallow copy. - * - * See Memory for further details. - */ - Image2DGL& operator = (const Image2DGL& rhs) - { - if (this != &rhs) { - Image2D::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image2DGL& operator = (const cl_mem& rhs) - { - Image2D::operator=(rhs); - return *this; - } -}; -#endif // #if !defined(CL_VERSION_1_2) - -#if defined(CL_VERSION_1_2) -/*! \class Image2DArray - * \brief Image interface for arrays of 2D images. - */ -class Image2DArray : public Image -{ -public: - Image2DArray( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t arraySize, - ::size_t width, - ::size_t height, - ::size_t rowPitch, - ::size_t slicePitch, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D_ARRAY, - width, - height, - 0, // depth (unused) - arraySize, - rowPitch, - slicePitch, - 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } - - Image2DArray() { } - - Image2DArray(const Image2DArray& imageArray) : Image(imageArray) { } - - __CL_EXPLICIT_CONSTRUCTORS Image2DArray(const cl_mem& imageArray) : Image(imageArray) { } - - Image2DArray& operator = (const Image2DArray& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } - - Image2DArray& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } -}; -#endif // #if defined(CL_VERSION_1_2) - -/*! \brief Class interface for 3D Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image3D : public Image -{ -public: - /*! \brief Constructs a 3D Image in a specified context. - * - * Wraps clCreateImage(). - */ - Image3D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t depth, - ::size_t row_pitch = 0, - ::size_t slice_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - bool useCreateImage; - -#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above - } -#elif defined(CL_VERSION_1_2) - useCreateImage = true; -#else - useCreateImage = false; -#endif - -#if defined(CL_VERSION_1_2) - if (useCreateImage) - { - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE3D, - width, - height, - depth, - 0, // array size (unused) - row_pitch, - slice_pitch, - 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // #if defined(CL_VERSION_1_2) -#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - if (!useCreateImage) - { - object_ = ::clCreateImage3D( - context(), flags, &format, width, height, depth, row_pitch, - slice_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE3D_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - } - - //! \brief Default constructor - initializes to NULL. - Image3D() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * See Memory for further details. - */ - Image3D(const Image3D& image3D) : Image(image3D) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image3D(const cl_mem& image3D) : Image(image3D) { } - - /*! \brief Assignment from Image3D - performs shallow copy. - * - * See Memory for further details. - */ - Image3D& operator = (const Image3D& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image3D& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } -}; - -#if !defined(CL_VERSION_1_2) -/*! \brief Class interface for GL 3D Image Memory objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image3DGL : public Image3D -{ -public: - /*! \brief Constructs an Image3DGL in a specified context, from a given - * GL Texture. - * - * Wraps clCreateFromGLTexture3D(). - */ - Image3DGL( - const Context& context, - cl_mem_flags flags, - GLenum target, - GLint miplevel, - GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture3D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_TEXTURE_3D_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - Image3DGL() : Image3D() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * See Memory for further details. - */ - Image3DGL(const Image3DGL& image) : Image3D(image) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image3DGL(const cl_mem& image) : Image3D(image) { } - - /*! \brief Assignment from Image3DGL - performs shallow copy. - * - * See Memory for further details. - */ - Image3DGL& operator = (const Image3DGL& rhs) - { - if (this != &rhs) { - Image3D::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image3DGL& operator = (const cl_mem& rhs) - { - Image3D::operator=(rhs); - return *this; - } -}; -#endif // #if !defined(CL_VERSION_1_2) - -#if defined(CL_VERSION_1_2) -/*! \class ImageGL - * \brief general image interface for GL interop. - * We abstract the 2D and 3D GL images into a single instance here - * that wraps all GL sourced images on the grounds that setup information - * was performed by OpenCL anyway. - */ -class ImageGL : public Image -{ -public: - ImageGL( - const Context& context, - cl_mem_flags flags, - GLenum target, - GLint miplevel, - GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_TEXTURE_ERR); - if (err != NULL) { - *err = error; - } - } - - ImageGL() : Image() { } - - ImageGL(const ImageGL& image) : Image(image) { } - - __CL_EXPLICIT_CONSTRUCTORS ImageGL(const cl_mem& image) : Image(image) { } - - ImageGL& operator = (const ImageGL& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } - - ImageGL& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } -}; -#endif // #if defined(CL_VERSION_1_2) - -/*! \brief Class interface for cl_sampler. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_sampler as the original. For details, see - * clRetainSampler() and clReleaseSampler(). - * - * \see cl_sampler - */ -class Sampler : public detail::Wrapper -{ -public: - /*! \brief Destructor. - * - * This calls clReleaseSampler() on the value held by this instance. - */ - ~Sampler() { } - - //! \brief Default constructor - initializes to NULL. - Sampler() { } - - /*! \brief Constructs a Sampler in a specified context. - * - * Wraps clCreateSampler(). - */ - Sampler( - const Context& context, - cl_bool normalized_coords, - cl_addressing_mode addressing_mode, - cl_filter_mode filter_mode, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateSampler( - context(), - normalized_coords, - addressing_mode, - filter_mode, - &error); - - detail::errHandler(error, __CREATE_SAMPLER_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Copy constructor - performs shallow copy. - * - * This calls clRetainSampler() on the parameter's cl_sampler. - */ - Sampler(const Sampler& sampler) : detail::Wrapper(sampler) { } - - /*! \brief Constructor from cl_sampler - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_sampler - * into the new Sampler object. - */ - Sampler(const cl_sampler& sampler) : detail::Wrapper(sampler) { } - - /*! \brief Assignment operator from Sampler. - * - * This calls clRetainSampler() on the parameter and clReleaseSampler() - * on the previous value held by this instance. - */ - Sampler& operator = (const Sampler& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment operator from cl_sampler - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseSampler() on the value previously held by this instance. - */ - Sampler& operator = (const cl_sampler& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetSamplerInfo(). - template - cl_int getInfo(cl_sampler_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetSamplerInfo, object_, name, param), - __GET_SAMPLER_INFO_ERR); - } - - //! \brief Wrapper for clGetSamplerInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_sampler_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -class Program; -class CommandQueue; -class Kernel; - -//! \brief Class interface for specifying NDRange values. -class NDRange -{ -private: - size_t<3> sizes_; - cl_uint dimensions_; - -public: - //! \brief Default constructor - resulting range has zero dimensions. - NDRange() - : dimensions_(0) - { } - - //! \brief Constructs one-dimensional range. - NDRange(::size_t size0) - : dimensions_(1) - { - sizes_[0] = size0; - } - - //! \brief Constructs two-dimensional range. - NDRange(::size_t size0, ::size_t size1) - : dimensions_(2) - { - sizes_[0] = size0; - sizes_[1] = size1; - } - - //! \brief Constructs three-dimensional range. - NDRange(::size_t size0, ::size_t size1, ::size_t size2) - : dimensions_(3) - { - sizes_[0] = size0; - sizes_[1] = size1; - sizes_[2] = size2; - } - - /*! \brief Conversion operator to const ::size_t *. - * - * \returns a pointer to the size of the first dimension. - */ - operator const ::size_t*() const { - return (const ::size_t*) sizes_; - } - - //! \brief Queries the number of dimensions in the range. - ::size_t dimensions() const { return dimensions_; } -}; - -//! \brief A zero-dimensional range. -static const NDRange NullRange; - -//! \brief Local address wrapper for use with Kernel::setArg -struct LocalSpaceArg -{ - ::size_t size_; -}; - -namespace detail { - -template -struct KernelArgumentHandler -{ - static ::size_t size(const T&) { return sizeof(T); } - static T* ptr(T& value) { return &value; } -}; - -template <> -struct KernelArgumentHandler -{ - static ::size_t size(const LocalSpaceArg& value) { return value.size_; } - static void* ptr(LocalSpaceArg&) { return NULL; } -}; - -} -//! \endcond - -/*! __local - * \brief Helper function for generating LocalSpaceArg objects. - * Deprecated. Replaced with Local. - */ -inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED LocalSpaceArg -__local(::size_t size) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; -inline LocalSpaceArg -__local(::size_t size) -{ - LocalSpaceArg ret = { size }; - return ret; -} - -/*! Local - * \brief Helper function for generating LocalSpaceArg objects. - */ -inline LocalSpaceArg -Local(::size_t size) -{ - LocalSpaceArg ret = { size }; - return ret; -} - -//class KernelFunctor; - -/*! \brief Class interface for cl_kernel. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_kernel as the original. For details, see - * clRetainKernel() and clReleaseKernel(). - * - * \see cl_kernel - */ -class Kernel : public detail::Wrapper -{ -public: - inline Kernel(const Program& program, const char* name, cl_int* err = NULL); - - /*! \brief Destructor. - * - * This calls clReleaseKernel() on the value held by this instance. - */ - ~Kernel() { } - - //! \brief Default constructor - initializes to NULL. - Kernel() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * This calls clRetainKernel() on the parameter's cl_kernel. - */ - Kernel(const Kernel& kernel) : detail::Wrapper(kernel) { } - - /*! \brief Constructor from cl_kernel - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_kernel - * into the new Kernel object. - */ - __CL_EXPLICIT_CONSTRUCTORS Kernel(const cl_kernel& kernel) : detail::Wrapper(kernel) { } - - /*! \brief Assignment operator from Kernel. - * - * This calls clRetainKernel() on the parameter and clReleaseKernel() - * on the previous value held by this instance. - */ - Kernel& operator = (const Kernel& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment operator from cl_kernel - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseKernel() on the value previously held by this instance. - */ - Kernel& operator = (const cl_kernel& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - template - cl_int getInfo(cl_kernel_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetKernelInfo, object_, name, param), - __GET_KERNEL_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if defined(CL_VERSION_1_2) - template - cl_int getArgInfo(cl_uint argIndex, cl_kernel_arg_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetKernelArgInfo, object_, argIndex, name, param), - __GET_KERNEL_ARG_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getArgInfo(cl_uint argIndex, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_arg_info, name>::param_type param; - cl_int result = getArgInfo(argIndex, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -#endif // #if defined(CL_VERSION_1_2) - - template - cl_int getWorkGroupInfo( - const Device& device, cl_kernel_work_group_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetKernelWorkGroupInfo, object_, device(), name, param), - __GET_KERNEL_WORK_GROUP_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getWorkGroupInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_work_group_info, name>::param_type param; - cl_int result = getWorkGroupInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int setArg(cl_uint index, T value) - { - return detail::errHandler( - ::clSetKernelArg( - object_, - index, - detail::KernelArgumentHandler::size(value), - detail::KernelArgumentHandler::ptr(value)), - __SET_KERNEL_ARGS_ERR); - } - - cl_int setArg(cl_uint index, ::size_t size, void* argPtr) - { - return detail::errHandler( - ::clSetKernelArg(object_, index, size, argPtr), - __SET_KERNEL_ARGS_ERR); - } -}; - -/*! \class Program - * \brief Program interface that implements cl_program. - */ -class Program : public detail::Wrapper -{ -public: - typedef VECTOR_CLASS > Binaries; - typedef VECTOR_CLASS > Sources; - - Program( - const STRING_CLASS& source, - bool build = false, - cl_int* err = NULL) - { - cl_int error; - - const char * strings = source.c_str(); - const ::size_t length = source.size(); - - Context context = Context::getDefault(err); - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)1, &strings, &length, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - - if (error == CL_SUCCESS && build) { - - error = ::clBuildProgram( - object_, - 0, - NULL, - "", - NULL, - NULL); - - detail::errHandler(error, __BUILD_PROGRAM_ERR); - } - - if (err != NULL) { - *err = error; - } - } - - Program( - const Context& context, - const STRING_CLASS& source, - bool build = false, - cl_int* err = NULL) - { - cl_int error; - - const char * strings = source.c_str(); - const ::size_t length = source.size(); - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)1, &strings, &length, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - - if (error == CL_SUCCESS && build) { - - error = ::clBuildProgram( - object_, - 0, - NULL, - "", - NULL, - NULL); - - detail::errHandler(error, __BUILD_PROGRAM_ERR); - } - - if (err != NULL) { - *err = error; - } - } - - Program( - const Context& context, - const Sources& sources, - cl_int* err = NULL) - { - cl_int error; - - const ::size_t n = (::size_t)sources.size(); - ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); - const char** strings = (const char**) alloca(n * sizeof(const char*)); - - for (::size_t i = 0; i < n; ++i) { - strings[i] = sources[(int)i].first; - lengths[i] = sources[(int)i].second; - } - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)n, strings, lengths, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - if (err != NULL) { - *err = error; - } - } - - /** - * Construct a program object from a list of devices and a per-device list of binaries. - * \param context A valid OpenCL context in which to construct the program. - * \param devices A vector of OpenCL device objects for which the program will be created. - * \param binaries A vector of pairs of a pointer to a binary object and its length. - * \param binaryStatus An optional vector that on completion will be resized to - * match the size of binaries and filled with values to specify if each binary - * was successfully loaded. - * Set to CL_SUCCESS if the binary was successfully loaded. - * Set to CL_INVALID_VALUE if the length is 0 or the binary pointer is NULL. - * Set to CL_INVALID_BINARY if the binary provided is not valid for the matching device. - * \param err if non-NULL will be set to CL_SUCCESS on successful operation or one of the following errors: - * CL_INVALID_CONTEXT if context is not a valid context. - * CL_INVALID_VALUE if the length of devices is zero; or if the length of binaries does not match the length of devices; - * or if any entry in binaries is NULL or has length 0. - * CL_INVALID_DEVICE if OpenCL devices listed in devices are not in the list of devices associated with context. - * CL_INVALID_BINARY if an invalid program binary was encountered for any device. binaryStatus will return specific status for each device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host. - */ - Program( - const Context& context, - const VECTOR_CLASS& devices, - const Binaries& binaries, - VECTOR_CLASS* binaryStatus = NULL, - cl_int* err = NULL) - { - cl_int error; - - const ::size_t numDevices = devices.size(); - - // Catch size mismatch early and return - if(binaries.size() != numDevices) { - error = CL_INVALID_VALUE; - detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); - if (err != NULL) { - *err = error; - } - return; - } - - ::size_t* lengths = (::size_t*) alloca(numDevices * sizeof(::size_t)); - const unsigned char** images = (const unsigned char**) alloca(numDevices * sizeof(const unsigned char**)); - - for (::size_t i = 0; i < numDevices; ++i) { - images[i] = (const unsigned char*)binaries[i].first; - lengths[i] = binaries[(int)i].second; - } - - cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); - for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - if(binaryStatus) { - binaryStatus->resize(numDevices); - } - - object_ = ::clCreateProgramWithBinary( - context(), (cl_uint) devices.size(), - deviceIDs, - lengths, images, binaryStatus != NULL - ? &binaryStatus->front() - : NULL, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); - if (err != NULL) { - *err = error; - } - } - - -#if defined(CL_VERSION_1_2) - /** - * Create program using builtin kernels. - * \param kernelNames Semi-colon separated list of builtin kernel names - */ - Program( - const Context& context, - const VECTOR_CLASS& devices, - const STRING_CLASS& kernelNames, - cl_int* err = NULL) - { - cl_int error; - - - ::size_t numDevices = devices.size(); - cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); - for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - object_ = ::clCreateProgramWithBuiltInKernels( - context(), - (cl_uint) devices.size(), - deviceIDs, - kernelNames.c_str(), - &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // #if defined(CL_VERSION_1_2) - - Program() { } - - Program(const Program& program) : detail::Wrapper(program) { } - - __CL_EXPLICIT_CONSTRUCTORS Program(const cl_program& program) : detail::Wrapper(program) { } - - Program& operator = (const Program& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - Program& operator = (const cl_program& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - cl_int build( - const VECTOR_CLASS& devices, - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL) const - { - ::size_t numDevices = devices.size(); - cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); - for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - return detail::errHandler( - ::clBuildProgram( - object_, - (cl_uint) - devices.size(), - deviceIDs, - options, - notifyFptr, - data), - __BUILD_PROGRAM_ERR); - } - - cl_int build( - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL) const - { - return detail::errHandler( - ::clBuildProgram( - object_, - 0, - NULL, - options, - notifyFptr, - data), - __BUILD_PROGRAM_ERR); - } - -#if defined(CL_VERSION_1_2) - cl_int compile( - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL) const - { - return detail::errHandler( - ::clCompileProgram( - object_, - 0, - NULL, - options, - 0, - NULL, - NULL, - notifyFptr, - data), - __COMPILE_PROGRAM_ERR); - } -#endif - - template - cl_int getInfo(cl_program_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetProgramInfo, object_, name, param), - __GET_PROGRAM_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getBuildInfo( - const Device& device, cl_program_build_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetProgramBuildInfo, object_, device(), name, param), - __GET_PROGRAM_BUILD_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getBuildInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_build_info, name>::param_type param; - cl_int result = getBuildInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int createKernels(VECTOR_CLASS* kernels) - { - cl_uint numKernels; - cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel)); - err = ::clCreateKernelsInProgram( - object_, numKernels, (cl_kernel*) value, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - kernels->assign(&value[0], &value[numKernels]); - return CL_SUCCESS; - } -}; - -#if defined(CL_VERSION_1_2) -inline Program linkProgram( - Program input1, - Program input2, - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL, - cl_int* err = NULL) -{ - cl_int err_local = CL_SUCCESS; - - cl_program programs[2] = { input1(), input2() }; - - Context ctx = input1.getInfo(); - - cl_program prog = ::clLinkProgram( - ctx(), - 0, - NULL, - options, - 2, - programs, - notifyFptr, - data, - &err_local); - - detail::errHandler(err_local,__COMPILE_PROGRAM_ERR); - if (err != NULL) { - *err = err_local; - } - - return Program(prog); -} - -inline Program linkProgram( - VECTOR_CLASS inputPrograms, - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL, - cl_int* err = NULL) -{ - cl_int err_local = CL_SUCCESS; - - cl_program * programs = (cl_program*) alloca(inputPrograms.size() * sizeof(cl_program)); - - if (programs != NULL) { - for (unsigned int i = 0; i < inputPrograms.size(); i++) { - programs[i] = inputPrograms[i](); - } - } - - cl_program prog = ::clLinkProgram( - Context::getDefault()(), - 0, - NULL, - options, - (cl_uint)inputPrograms.size(), - programs, - notifyFptr, - data, - &err_local); - - detail::errHandler(err_local,__COMPILE_PROGRAM_ERR); - if (err != NULL) { - *err = err_local; - } - - return Program(prog); -} -#endif - -template<> -inline VECTOR_CLASS cl::Program::getInfo(cl_int* err) const -{ - VECTOR_CLASS< ::size_t> sizes = getInfo(); - VECTOR_CLASS binaries; - for (VECTOR_CLASS< ::size_t>::iterator s = sizes.begin(); s != sizes.end(); ++s) - { - char *ptr = NULL; - if (*s != 0) - ptr = new char[*s]; - binaries.push_back(ptr); - } - - cl_int result = getInfo(CL_PROGRAM_BINARIES, &binaries); - if (err != NULL) { - *err = result; - } - return binaries; -} - -inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) -{ - cl_int error; - - object_ = ::clCreateKernel(program(), name, &error); - detail::errHandler(error, __CREATE_KERNEL_ERR); - - if (err != NULL) { - *err = error; - } - -} - -/*! \class CommandQueue - * \brief CommandQueue interface for cl_command_queue. - */ -class CommandQueue : public detail::Wrapper -{ -private: - static volatile int default_initialized_; - static CommandQueue default_; - static volatile cl_int default_error_; -public: - CommandQueue( - cl_command_queue_properties properties, - cl_int* err = NULL) - { - cl_int error; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - - if (error != CL_SUCCESS) { - if (err != NULL) { - *err = error; - } - } - else { - Device device = context.getInfo()[0]; - - object_ = ::clCreateCommandQueue( - context(), device(), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } - } - /*! - * \brief Constructs a CommandQueue for an implementation defined device in the given context - */ - explicit CommandQueue( - const Context& context, - cl_command_queue_properties properties = 0, - cl_int* err = NULL) - { - cl_int error; - VECTOR_CLASS devices; - error = context.getInfo(CL_CONTEXT_DEVICES, &devices); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - - if (error != CL_SUCCESS) - { - if (err != NULL) { - *err = error; - } - return; - } - - object_ = ::clCreateCommandQueue(context(), devices[0](), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - - if (err != NULL) { - *err = error; - } - - } - - CommandQueue( - const Context& context, - const Device& device, - cl_command_queue_properties properties = 0, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateCommandQueue( - context(), device(), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } - - static CommandQueue getDefault(cl_int * err = NULL) - { - int state = detail::compare_exchange( - &default_initialized_, - __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); - - if (state & __DEFAULT_INITIALIZED) { - if (err != NULL) { - *err = default_error_; - } - return default_; - } - - if (state & __DEFAULT_BEING_INITIALIZED) { - // Assume writes will propagate eventually... - while(default_initialized_ != __DEFAULT_INITIALIZED) { - detail::fence(); - } - - if (err != NULL) { - *err = default_error_; - } - return default_; - } - - cl_int error; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - - if (error != CL_SUCCESS) { - if (err != NULL) { - *err = error; - } - } - else { - Device device = context.getInfo()[0]; - - default_ = CommandQueue(context, device, 0, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } - - detail::fence(); - - default_error_ = error; - // Assume writes will propagate eventually... - default_initialized_ = __DEFAULT_INITIALIZED; - - detail::fence(); - - if (err != NULL) { - *err = default_error_; - } - return default_; - - } - - CommandQueue() { } - - CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper(commandQueue) { } - - CommandQueue(const cl_command_queue& commandQueue) : detail::Wrapper(commandQueue) { } - - CommandQueue& operator = (const CommandQueue& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - CommandQueue& operator = (const cl_command_queue& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - template - cl_int getInfo(cl_command_queue_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetCommandQueueInfo, object_, name, param), - __GET_COMMAND_QUEUE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_command_queue_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int enqueueReadBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReadBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_READ_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueWriteBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - const void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueWriteBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_WRITE_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyBuffer( - const Buffer& src, - const Buffer& dst, - ::size_t src_offset, - ::size_t dst_offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyBuffer( - object_, src(), dst(), src_offset, dst_offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQEUE_COPY_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueReadBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReadBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_READ_BUFFER_RECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueWriteBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueWriteBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_WRITE_BUFFER_RECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyBufferRect( - const Buffer& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - ::size_t src_row_pitch, - ::size_t src_slice_pitch, - ::size_t dst_row_pitch, - ::size_t dst_slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyBufferRect( - object_, - src(), - dst(), - (const ::size_t *)src_origin, - (const ::size_t *)dst_origin, - (const ::size_t *)region, - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQEUE_COPY_BUFFER_RECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if defined(CL_VERSION_1_2) - /** - * Enqueue a command to fill a buffer object with a pattern - * of a given size. The pattern is specified a as vector. - * \tparam PatternType The datatype of the pattern field. - * The pattern type must be an accepted OpenCL data type. - */ - template - cl_int enqueueFillBuffer( - const Buffer& buffer, - PatternType pattern, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillBuffer( - object_, - buffer(), - static_cast(&pattern), - sizeof(PatternType), - offset, - size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // #if defined(CL_VERSION_1_2) - - cl_int enqueueReadImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReadImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_READ_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueWriteImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueWriteImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_WRITE_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyImage( - const Image& src, - const Image& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyImage( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *)dst_origin, (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_COPY_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if defined(CL_VERSION_1_2) - /** - * Enqueue a command to fill an image object with a specified color. - * \param fillColor is the color to use to fill the image. - * This is a four component RGBA floating-point color value if - * the image channel data type is not an unnormalized signed or - * unsigned data type. - */ - cl_int enqueueFillImage( - const Image& image, - cl_float4 fillColor, - const size_t<3>& origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillImage( - object_, - image(), - static_cast(&fillColor), - (const ::size_t *) origin, - (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueue a command to fill an image object with a specified color. - * \param fillColor is the color to use to fill the image. - * This is a four component RGBA signed integer color value if - * the image channel data type is an unnormalized signed integer - * type. - */ - cl_int enqueueFillImage( - const Image& image, - cl_int4 fillColor, - const size_t<3>& origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillImage( - object_, - image(), - static_cast(&fillColor), - (const ::size_t *) origin, - (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueue a command to fill an image object with a specified color. - * \param fillColor is the color to use to fill the image. - * This is a four component RGBA unsigned integer color value if - * the image channel data type is an unnormalized unsigned integer - * type. - */ - cl_int enqueueFillImage( - const Image& image, - cl_uint4 fillColor, - const size_t<3>& origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillImage( - object_, - image(), - static_cast(&fillColor), - (const ::size_t *) origin, - (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // #if defined(CL_VERSION_1_2) - - cl_int enqueueCopyImageToBuffer( - const Image& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& region, - ::size_t dst_offset, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyImageToBuffer( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *) region, dst_offset, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyBufferToImage( - const Buffer& src, - const Image& dst, - ::size_t src_offset, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyBufferToImage( - object_, src(), dst(), src_offset, - (const ::size_t *) dst_origin, (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - void* enqueueMapBuffer( - const Buffer& buffer, - cl_bool blocking, - cl_map_flags flags, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_int error; - void * result = ::clEnqueueMapBuffer( - object_, buffer(), blocking, flags, offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - return result; - } - - void* enqueueMapImage( - const Image& buffer, - cl_bool blocking, - cl_map_flags flags, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t * row_pitch, - ::size_t * slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_int error; - void * result = ::clEnqueueMapImage( - object_, buffer(), blocking, flags, - (const ::size_t *) origin, (const ::size_t *) region, - row_pitch, slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - return result; - } - - cl_int enqueueUnmapMemObject( - const Memory& memory, - void* mapped_ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueUnmapMemObject( - object_, memory(), mapped_ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if defined(CL_VERSION_1_2) - /** - * Enqueues a marker command which waits for either a list of events to complete, - * or all previously enqueued commands to complete. - * - * Enqueues a marker command which waits for either a list of events to complete, - * or if the list is empty it waits for all commands previously enqueued in command_queue - * to complete before it completes. This command returns an event which can be waited on, - * i.e. this event can be waited on to insure that all events either in the event_wait_list - * or all previously enqueued commands, queued before this command to command_queue, - * have completed. - */ - cl_int enqueueMarkerWithWaitList( - const VECTOR_CLASS *events = 0, - Event *event = 0) - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueMarkerWithWaitList( - object_, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_MARKER_WAIT_LIST_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * A synchronization point that enqueues a barrier operation. - * - * Enqueues a barrier command which waits for either a list of events to complete, - * or if the list is empty it waits for all commands previously enqueued in command_queue - * to complete before it completes. This command blocks command execution, that is, any - * following commands enqueued after it do not execute until it completes. This command - * returns an event which can be waited on, i.e. this event can be waited on to insure that - * all events either in the event_wait_list or all previously enqueued commands, queued - * before this command to command_queue, have completed. - */ - cl_int enqueueBarrierWithWaitList( - const VECTOR_CLASS *events = 0, - Event *event = 0) - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueBarrierWithWaitList( - object_, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_BARRIER_WAIT_LIST_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueues a command to indicate with which device a set of memory objects - * should be associated. - */ - cl_int enqueueMigrateMemObjects( - const VECTOR_CLASS &memObjects, - cl_mem_migration_flags flags, - const VECTOR_CLASS* events = NULL, - Event* event = NULL - ) - { - cl_event tmp; - - cl_mem* localMemObjects = static_cast(alloca(memObjects.size() * sizeof(cl_mem))); - for( int i = 0; i < (int)memObjects.size(); ++i ) { - localMemObjects[i] = memObjects[i](); - } - - - cl_int err = detail::errHandler( - ::clEnqueueMigrateMemObjects( - object_, - (cl_uint)memObjects.size(), - static_cast(localMemObjects), - flags, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // #if defined(CL_VERSION_1_2) - - cl_int enqueueNDRangeKernel( - const Kernel& kernel, - const NDRange& offset, - const NDRange& global, - const NDRange& local = NullRange, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueNDRangeKernel( - object_, kernel(), (cl_uint) global.dimensions(), - offset.dimensions() != 0 ? (const ::size_t*) offset : NULL, - (const ::size_t*) global, - local.dimensions() != 0 ? (const ::size_t*) local : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_NDRANGE_KERNEL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueTask( - const Kernel& kernel, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueTask( - object_, kernel(), - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_TASK_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueNativeKernel( - void (CL_CALLBACK *userFptr)(void *), - std::pair args, - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* mem_locs = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) - ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem)) - : NULL; - - if (mems != NULL) { - for (unsigned int i = 0; i < mem_objects->size(); i++) { - mems[i] = ((*mem_objects)[i])(); - } - } - - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueNativeKernel( - object_, userFptr, args.first, args.second, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - mems, - (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_NATIVE_KERNEL); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueMarker(Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - { - return detail::errHandler( - ::clEnqueueMarker(object_, (cl_event*) event), - __ENQUEUE_MARKER_ERR); - } - - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueWaitForEvents(const VECTOR_CLASS& events) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - { - return detail::errHandler( - ::clEnqueueWaitForEvents( - object_, - (cl_uint) events.size(), - (const cl_event*) &events.front()), - __ENQUEUE_WAIT_FOR_EVENTS_ERR); - } -#endif // #if defined(CL_VERSION_1_1) - - cl_int enqueueAcquireGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueAcquireGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_ACQUIRE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueReleaseGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReleaseGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_RELEASE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if defined (USE_DX_INTEROP) -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); - - cl_int enqueueAcquireD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; -#if defined(CL_VERSION_1_2) - cl_context context = getInfo(); - cl::Device device(getInfo()); - cl_platform_id platform = device.getInfo(); - __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueAcquireD3D10ObjectsKHR); -#endif -#if defined(CL_VERSION_1_1) - __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR); -#endif - - cl_event tmp; - cl_int err = detail::errHandler( - pfn_clEnqueueAcquireD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_ACQUIRE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueReleaseD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; -#if defined(CL_VERSION_1_2) - cl_context context = getInfo(); - cl::Device device(getInfo()); - cl_platform_id platform = device.getInfo(); - __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueReleaseD3D10ObjectsKHR); -#endif // #if defined(CL_VERSION_1_2) -#if defined(CL_VERSION_1_1) - __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR); -#endif // #if defined(CL_VERSION_1_1) - - cl_event tmp; - cl_int err = detail::errHandler( - pfn_clEnqueueReleaseD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_RELEASE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueBarrier() const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - { - return detail::errHandler( - ::clEnqueueBarrier(object_), - __ENQUEUE_BARRIER_ERR); - } -#endif // #if defined(CL_VERSION_1_1) - - cl_int flush() const - { - return detail::errHandler(::clFlush(object_), __FLUSH_ERR); - } - - cl_int finish() const - { - return detail::errHandler(::clFinish(object_), __FINISH_ERR); - } -}; - -#ifdef _WIN32 -__declspec(selectany) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; -__declspec(selectany) CommandQueue CommandQueue::default_; -__declspec(selectany) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS; -#else -__attribute__((weak)) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; -__attribute__((weak)) CommandQueue CommandQueue::default_; -__attribute__((weak)) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS; -#endif - -template< typename IteratorType > -Buffer::Buffer( - const Context &context, - IteratorType startIterator, - IteratorType endIterator, - bool readOnly, - bool useHostPtr, - cl_int* err) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - cl_mem_flags flags = 0; - if( readOnly ) { - flags |= CL_MEM_READ_ONLY; - } - else { - flags |= CL_MEM_READ_WRITE; - } - if( useHostPtr ) { - flags |= CL_MEM_USE_HOST_PTR; - } - - ::size_t size = sizeof(DataType)*(endIterator - startIterator); - - if( useHostPtr ) { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); - } else { - object_ = ::clCreateBuffer(context(), flags, size, 0, &error); - } - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - if( !useHostPtr ) { - CommandQueue queue(context, 0, &error); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - error = cl::copy(queue, startIterator, endIterator, *this); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } -} - -inline cl_int enqueueReadBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueReadBuffer(buffer, blocking, offset, size, ptr, events, event); -} - -inline cl_int enqueueWriteBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - const void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueWriteBuffer(buffer, blocking, offset, size, ptr, events, event); -} - -inline void* enqueueMapBuffer( - const Buffer& buffer, - cl_bool blocking, - cl_map_flags flags, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - void * result = ::clEnqueueMapBuffer( - queue(), buffer(), blocking, flags, offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - return result; -} - -inline cl_int enqueueUnmapMemObject( - const Memory& memory, - void* mapped_ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (error != CL_SUCCESS) { - return error; - } - - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueUnmapMemObject( - queue(), memory(), mapped_ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; -} - -inline cl_int enqueueCopyBuffer( - const Buffer& src, - const Buffer& dst, - ::size_t src_offset, - ::size_t dst_offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyBuffer(src, dst, src_offset, dst_offset, size, events, event); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Host to Device. - * Uses default command queue. - */ -template< typename IteratorType > -inline cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) - return error; - - return cl::copy(queue, startIterator, endIterator, buffer); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Device to Host. - * Uses default command queue. - */ -template< typename IteratorType > -inline cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) - return error; - - return cl::copy(queue, buffer, startIterator, endIterator); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Host to Device. - * Uses specified queue. - */ -template< typename IteratorType > -inline cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - ::size_t length = endIterator-startIterator; - ::size_t byteLength = length*sizeof(DataType); - - DataType *pointer = - static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_WRITE, 0, byteLength, 0, 0, &error)); - // if exceptions enabled, enqueueMapBuffer will throw - if( error != CL_SUCCESS ) { - return error; - } -#if defined(_MSC_VER) - std::copy( - startIterator, - endIterator, - stdext::checked_array_iterator( - pointer, length)); -#else - std::copy(startIterator, endIterator, pointer); -#endif - Event endEvent; - error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); - // if exceptions enabled, enqueueUnmapMemObject will throw - if( error != CL_SUCCESS ) { - return error; - } - endEvent.wait(); - return CL_SUCCESS; -} - -/** - * Blocking copy operation between iterators and a buffer. - * Device to Host. - * Uses specified queue. - */ -template< typename IteratorType > -inline cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - ::size_t length = endIterator-startIterator; - ::size_t byteLength = length*sizeof(DataType); - - DataType *pointer = - static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, byteLength, 0, 0, &error)); - // if exceptions enabled, enqueueMapBuffer will throw - if( error != CL_SUCCESS ) { - return error; - } - std::copy(pointer, pointer + length, startIterator); - Event endEvent; - error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); - // if exceptions enabled, enqueueUnmapMemObject will throw - if( error != CL_SUCCESS ) { - return error; - } - endEvent.wait(); - return CL_SUCCESS; -} - -#if defined(CL_VERSION_1_1) -inline cl_int enqueueReadBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueReadBufferRect( - buffer, - blocking, - buffer_offset, - host_offset, - region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueWriteBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueWriteBufferRect( - buffer, - blocking, - buffer_offset, - host_offset, - region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueCopyBufferRect( - const Buffer& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - ::size_t src_row_pitch, - ::size_t src_slice_pitch, - ::size_t dst_row_pitch, - ::size_t dst_slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyBufferRect( - src, - dst, - src_origin, - dst_origin, - region, - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - events, - event); -} -#endif - -inline cl_int enqueueReadImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueReadImage( - image, - blocking, - origin, - region, - row_pitch, - slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueWriteImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueWriteImage( - image, - blocking, - origin, - region, - row_pitch, - slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueCopyImage( - const Image& src, - const Image& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyImage( - src, - dst, - src_origin, - dst_origin, - region, - events, - event); -} - -inline cl_int enqueueCopyImageToBuffer( - const Image& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& region, - ::size_t dst_offset, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyImageToBuffer( - src, - dst, - src_origin, - region, - dst_offset, - events, - event); -} - -inline cl_int enqueueCopyBufferToImage( - const Buffer& src, - const Image& dst, - ::size_t src_offset, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyBufferToImage( - src, - dst, - src_offset, - dst_origin, - region, - events, - event); -} - - -inline cl_int flush(void) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.flush(); -} - -inline cl_int finish(void) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - - return queue.finish(); -} - -// Kernel Functor support -// New interface as of September 2011 -// Requires the C++11 std::tr1::function (note do not support TR1) -// Visual Studio 2010 and GCC 4.2 - -struct EnqueueArgs -{ - CommandQueue queue_; - const NDRange offset_; - const NDRange global_; - const NDRange local_; - VECTOR_CLASS events_; - - EnqueueArgs(NDRange global) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange) - { - - } - - EnqueueArgs(NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local) - { - - } - - EnqueueArgs(NDRange offset, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local) - { - - } - - EnqueueArgs(Event e, NDRange global) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange) - { - events_.push_back(e); - } - - EnqueueArgs(Event e, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(Event e, NDRange offset, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(const VECTOR_CLASS &events, NDRange global) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange), - events_(events) - { - - } - - EnqueueArgs(const VECTOR_CLASS &events, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local), - events_(events) - { - - } - - EnqueueArgs(const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local), - events_(events) - { - - } - - EnqueueArgs(CommandQueue &queue, NDRange global) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange) - { - - } - - EnqueueArgs(CommandQueue &queue, NDRange global, NDRange local) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(local) - { - - } - - EnqueueArgs(CommandQueue &queue, NDRange offset, NDRange global, NDRange local) : - queue_(queue), - offset_(offset), - global_(global), - local_(local) - { - - } - - EnqueueArgs(CommandQueue &queue, Event e, NDRange global) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange) - { - events_.push_back(e); - } - - EnqueueArgs(CommandQueue &queue, Event e, NDRange global, NDRange local) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(CommandQueue &queue, Event e, NDRange offset, NDRange global, NDRange local) : - queue_(queue), - offset_(offset), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange), - events_(events) - { - - } - - EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global, NDRange local) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(local), - events_(events) - { - - } - - EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : - queue_(queue), - offset_(offset), - global_(global), - local_(local), - events_(events) - { - - } -}; - -namespace detail { - -class NullType {}; - -template -struct SetArg -{ - static void set (Kernel kernel, T0 arg) - { - kernel.setArg(index, arg); - } -}; - -template -struct SetArg -{ - static void set (Kernel, NullType) - { - } -}; - -template < - typename T0, typename T1, typename T2, typename T3, - typename T4, typename T5, typename T6, typename T7, - typename T8, typename T9, typename T10, typename T11, - typename T12, typename T13, typename T14, typename T15, - typename T16, typename T17, typename T18, typename T19, - typename T20, typename T21, typename T22, typename T23, - typename T24, typename T25, typename T26, typename T27, - typename T28, typename T29, typename T30, typename T31 -> -class KernelFunctorGlobal -{ -private: - Kernel kernel_; - -public: - KernelFunctorGlobal( - Kernel kernel) : - kernel_(kernel) - {} - - KernelFunctorGlobal( - const Program& program, - const STRING_CLASS name, - cl_int * err = NULL) : - kernel_(program, name.c_str(), err) - {} - - Event operator() ( - const EnqueueArgs& args, - T0 t0, - T1 t1 = NullType(), - T2 t2 = NullType(), - T3 t3 = NullType(), - T4 t4 = NullType(), - T5 t5 = NullType(), - T6 t6 = NullType(), - T7 t7 = NullType(), - T8 t8 = NullType(), - T9 t9 = NullType(), - T10 t10 = NullType(), - T11 t11 = NullType(), - T12 t12 = NullType(), - T13 t13 = NullType(), - T14 t14 = NullType(), - T15 t15 = NullType(), - T16 t16 = NullType(), - T17 t17 = NullType(), - T18 t18 = NullType(), - T19 t19 = NullType(), - T20 t20 = NullType(), - T21 t21 = NullType(), - T22 t22 = NullType(), - T23 t23 = NullType(), - T24 t24 = NullType(), - T25 t25 = NullType(), - T26 t26 = NullType(), - T27 t27 = NullType(), - T28 t28 = NullType(), - T29 t29 = NullType(), - T30 t30 = NullType(), - T31 t31 = NullType() - ) - { - Event event; - SetArg<0, T0>::set(kernel_, t0); - SetArg<1, T1>::set(kernel_, t1); - SetArg<2, T2>::set(kernel_, t2); - SetArg<3, T3>::set(kernel_, t3); - SetArg<4, T4>::set(kernel_, t4); - SetArg<5, T5>::set(kernel_, t5); - SetArg<6, T6>::set(kernel_, t6); - SetArg<7, T7>::set(kernel_, t7); - SetArg<8, T8>::set(kernel_, t8); - SetArg<9, T9>::set(kernel_, t9); - SetArg<10, T10>::set(kernel_, t10); - SetArg<11, T11>::set(kernel_, t11); - SetArg<12, T12>::set(kernel_, t12); - SetArg<13, T13>::set(kernel_, t13); - SetArg<14, T14>::set(kernel_, t14); - SetArg<15, T15>::set(kernel_, t15); - SetArg<16, T16>::set(kernel_, t16); - SetArg<17, T17>::set(kernel_, t17); - SetArg<18, T18>::set(kernel_, t18); - SetArg<19, T19>::set(kernel_, t19); - SetArg<20, T20>::set(kernel_, t20); - SetArg<21, T21>::set(kernel_, t21); - SetArg<22, T22>::set(kernel_, t22); - SetArg<23, T23>::set(kernel_, t23); - SetArg<24, T24>::set(kernel_, t24); - SetArg<25, T25>::set(kernel_, t25); - SetArg<26, T26>::set(kernel_, t26); - SetArg<27, T27>::set(kernel_, t27); - SetArg<28, T28>::set(kernel_, t28); - SetArg<29, T29>::set(kernel_, t29); - SetArg<30, T30>::set(kernel_, t30); - SetArg<31, T31>::set(kernel_, t31); - - args.queue_.enqueueNDRangeKernel( - kernel_, - args.offset_, - args.global_, - args.local_, - &args.events_, - &event); - - return event; - } - -}; - -//------------------------------------------------------------------------------------------------------ - - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28, - typename T29, - typename T30, - typename T31> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - T31> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 32)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - T31); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28, - T29 arg29, - T30 arg30, - T31 arg31) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28, - arg29, - arg30, - arg31); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28, - typename T29, - typename T30> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 31)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28, - T29 arg29, - T30 arg30) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28, - arg29, - arg30); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28, - typename T29> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 30)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28, - T29 arg29) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28, - arg29); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 29)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 28)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 27)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 26)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 25)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 24)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 23)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 22)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 21)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 20)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 19)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 18)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 17)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 16)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 15)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 14)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 13)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 12)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 11)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 10)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 9)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 8)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 7)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 6)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 5)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 4)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3); - } - - -}; - -template< - typename T0, - typename T1, - typename T2> -struct functionImplementation_ -< T0, - T1, - T2, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 3)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2); - } - - -}; - -template< - typename T0, - typename T1> -struct functionImplementation_ -< T0, - T1, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 2)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1) - { - return functor_( - enqueueArgs, - arg0, - arg1); - } - - -}; - -template< - typename T0> -struct functionImplementation_ -< T0, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 1)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0) - { - return functor_( - enqueueArgs, - arg0); - } - - -}; - - - - - -} // namespace detail - -//---------------------------------------------------------------------------------------------- - -template < - typename T0, typename T1 = detail::NullType, typename T2 = detail::NullType, - typename T3 = detail::NullType, typename T4 = detail::NullType, - typename T5 = detail::NullType, typename T6 = detail::NullType, - typename T7 = detail::NullType, typename T8 = detail::NullType, - typename T9 = detail::NullType, typename T10 = detail::NullType, - typename T11 = detail::NullType, typename T12 = detail::NullType, - typename T13 = detail::NullType, typename T14 = detail::NullType, - typename T15 = detail::NullType, typename T16 = detail::NullType, - typename T17 = detail::NullType, typename T18 = detail::NullType, - typename T19 = detail::NullType, typename T20 = detail::NullType, - typename T21 = detail::NullType, typename T22 = detail::NullType, - typename T23 = detail::NullType, typename T24 = detail::NullType, - typename T25 = detail::NullType, typename T26 = detail::NullType, - typename T27 = detail::NullType, typename T28 = detail::NullType, - typename T29 = detail::NullType, typename T30 = detail::NullType, - typename T31 = detail::NullType -> -struct make_kernel : - public detail::functionImplementation_< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31 - > -{ -public: - typedef detail::KernelFunctorGlobal< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31 - > FunctorType; - - make_kernel( - const Program& program, - const STRING_CLASS name, - cl_int * err = NULL) : - detail::functionImplementation_< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31 - >( - FunctorType(program, name, err)) - {} - - make_kernel( - const Kernel kernel) : - detail::functionImplementation_< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31 - >( - FunctorType(kernel)) - {} -}; - - -//---------------------------------------------------------------------------------------------------------------------- - -#undef __ERR_STR -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#undef __GET_DEVICE_INFO_ERR -#undef __GET_PLATFORM_INFO_ERR -#undef __GET_DEVICE_IDS_ERR -#undef __GET_CONTEXT_INFO_ERR -#undef __GET_EVENT_INFO_ERR -#undef __GET_EVENT_PROFILE_INFO_ERR -#undef __GET_MEM_OBJECT_INFO_ERR -#undef __GET_IMAGE_INFO_ERR -#undef __GET_SAMPLER_INFO_ERR -#undef __GET_KERNEL_INFO_ERR -#undef __GET_KERNEL_ARG_INFO_ERR -#undef __GET_KERNEL_WORK_GROUP_INFO_ERR -#undef __GET_PROGRAM_INFO_ERR -#undef __GET_PROGRAM_BUILD_INFO_ERR -#undef __GET_COMMAND_QUEUE_INFO_ERR - -#undef __CREATE_CONTEXT_ERR -#undef __CREATE_CONTEXT_FROM_TYPE_ERR -#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR - -#undef __CREATE_BUFFER_ERR -#undef __CREATE_SUBBUFFER_ERR -#undef __CREATE_IMAGE2D_ERR -#undef __CREATE_IMAGE3D_ERR -#undef __CREATE_SAMPLER_ERR -#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR - -#undef __CREATE_USER_EVENT_ERR -#undef __SET_USER_EVENT_STATUS_ERR -#undef __SET_EVENT_CALLBACK_ERR -#undef __SET_PRINTF_CALLBACK_ERR - -#undef __WAIT_FOR_EVENTS_ERR - -#undef __CREATE_KERNEL_ERR -#undef __SET_KERNEL_ARGS_ERR -#undef __CREATE_PROGRAM_WITH_SOURCE_ERR -#undef __CREATE_PROGRAM_WITH_BINARY_ERR -#undef __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR -#undef __BUILD_PROGRAM_ERR -#undef __CREATE_KERNELS_IN_PROGRAM_ERR - -#undef __CREATE_COMMAND_QUEUE_ERR -#undef __SET_COMMAND_QUEUE_PROPERTY_ERR -#undef __ENQUEUE_READ_BUFFER_ERR -#undef __ENQUEUE_WRITE_BUFFER_ERR -#undef __ENQUEUE_READ_BUFFER_RECT_ERR -#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR -#undef __ENQEUE_COPY_BUFFER_ERR -#undef __ENQEUE_COPY_BUFFER_RECT_ERR -#undef __ENQUEUE_READ_IMAGE_ERR -#undef __ENQUEUE_WRITE_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR -#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR -#undef __ENQUEUE_MAP_BUFFER_ERR -#undef __ENQUEUE_MAP_IMAGE_ERR -#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR -#undef __ENQUEUE_NDRANGE_KERNEL_ERR -#undef __ENQUEUE_TASK_ERR -#undef __ENQUEUE_NATIVE_KERNEL - -#undef __CL_EXPLICIT_CONSTRUCTORS - -#undef __UNLOAD_COMPILER_ERR -#endif //__CL_USER_OVERRIDE_ERROR_STRINGS - -#undef __CL_FUNCTION_TYPE - -// Extensions -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_VERSION_1_1) -#undef __INIT_CL_EXT_FCN_PTR -#endif // #if defined(CL_VERSION_1_1) -#undef __CREATE_SUB_DEVICES - -#if defined(USE_CL_DEVICE_FISSION) -#undef __PARAM_NAME_DEVICE_FISSION -#endif // USE_CL_DEVICE_FISSION - -#undef __DEFAULT_NOT_INITIALIZED -#undef __DEFAULT_BEING_INITIALIZED -#undef __DEFAULT_INITIALIZED - -} // namespace cl - -#ifdef _WIN32 -#pragma pop_macro("max") -#endif // _WIN32 - -#endif // CL_HPP_ diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.cpp b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.cpp deleted file mode 100644 index 11b29333c..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.cpp +++ /dev/null @@ -1,754 +0,0 @@ -/* - This file is part of c-ethash. - - c-ethash is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - c-ethash is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with cpp-ethereum. If not, see . -*/ -/** @file ethash_cl_miner.cpp -* @author Tim Hughes -* @date 2015 -*/ - - -#define _CRT_SECURE_NO_WARNINGS - -#include -#include -#include "ethash_cl_miner.h" -#include - -#undef min -#undef max - -#define HASH_BYTES 32 - -static char const ethash_inner_code[] = R"( - -// author Tim Hughes -// Tested on Radeon HD 7850 -// Hashrate: 15940347 hashes/s -// Bandwidth: 124533 MB/s -// search kernel should fit in <= 84 VGPRS (3 wavefronts) - -#define THREADS_PER_HASH (128 / 16) -#define HASHES_PER_LOOP (GROUP_SIZE / THREADS_PER_HASH) - -#define FNV_PRIME 0x01000193 - -__constant uint2 const Keccak_f1600_RC[24] = { - (uint2)(0x00000001, 0x00000000), - (uint2)(0x00008082, 0x00000000), - (uint2)(0x0000808a, 0x80000000), - (uint2)(0x80008000, 0x80000000), - (uint2)(0x0000808b, 0x00000000), - (uint2)(0x80000001, 0x00000000), - (uint2)(0x80008081, 0x80000000), - (uint2)(0x00008009, 0x80000000), - (uint2)(0x0000008a, 0x00000000), - (uint2)(0x00000088, 0x00000000), - (uint2)(0x80008009, 0x00000000), - (uint2)(0x8000000a, 0x00000000), - (uint2)(0x8000808b, 0x00000000), - (uint2)(0x0000008b, 0x80000000), - (uint2)(0x00008089, 0x80000000), - (uint2)(0x00008003, 0x80000000), - (uint2)(0x00008002, 0x80000000), - (uint2)(0x00000080, 0x80000000), - (uint2)(0x0000800a, 0x00000000), - (uint2)(0x8000000a, 0x80000000), - (uint2)(0x80008081, 0x80000000), - (uint2)(0x00008080, 0x80000000), - (uint2)(0x80000001, 0x00000000), - (uint2)(0x80008008, 0x80000000), -}; - -void keccak_f1600_round(uint2* a, uint r, uint out_size) -{ - #if !__ENDIAN_LITTLE__ - for (uint i = 0; i != 25; ++i) - a[i] = a[i].yx; - #endif - - uint2 b[25]; - uint2 t; - - // Theta - b[0] = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20]; - b[1] = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21]; - b[2] = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22]; - b[3] = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23]; - b[4] = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24]; - t = b[4] ^ (uint2)(b[1].x << 1 | b[1].y >> 31, b[1].y << 1 | b[1].x >> 31); - a[0] ^= t; - a[5] ^= t; - a[10] ^= t; - a[15] ^= t; - a[20] ^= t; - t = b[0] ^ (uint2)(b[2].x << 1 | b[2].y >> 31, b[2].y << 1 | b[2].x >> 31); - a[1] ^= t; - a[6] ^= t; - a[11] ^= t; - a[16] ^= t; - a[21] ^= t; - t = b[1] ^ (uint2)(b[3].x << 1 | b[3].y >> 31, b[3].y << 1 | b[3].x >> 31); - a[2] ^= t; - a[7] ^= t; - a[12] ^= t; - a[17] ^= t; - a[22] ^= t; - t = b[2] ^ (uint2)(b[4].x << 1 | b[4].y >> 31, b[4].y << 1 | b[4].x >> 31); - a[3] ^= t; - a[8] ^= t; - a[13] ^= t; - a[18] ^= t; - a[23] ^= t; - t = b[3] ^ (uint2)(b[0].x << 1 | b[0].y >> 31, b[0].y << 1 | b[0].x >> 31); - a[4] ^= t; - a[9] ^= t; - a[14] ^= t; - a[19] ^= t; - a[24] ^= t; - - // Rho Pi - b[0] = a[0]; - b[10] = (uint2)(a[1].x << 1 | a[1].y >> 31, a[1].y << 1 | a[1].x >> 31); - b[7] = (uint2)(a[10].x << 3 | a[10].y >> 29, a[10].y << 3 | a[10].x >> 29); - b[11] = (uint2)(a[7].x << 6 | a[7].y >> 26, a[7].y << 6 | a[7].x >> 26); - b[17] = (uint2)(a[11].x << 10 | a[11].y >> 22, a[11].y << 10 | a[11].x >> 22); - b[18] = (uint2)(a[17].x << 15 | a[17].y >> 17, a[17].y << 15 | a[17].x >> 17); - b[3] = (uint2)(a[18].x << 21 | a[18].y >> 11, a[18].y << 21 | a[18].x >> 11); - b[5] = (uint2)(a[3].x << 28 | a[3].y >> 4, a[3].y << 28 | a[3].x >> 4); - b[16] = (uint2)(a[5].y << 4 | a[5].x >> 28, a[5].x << 4 | a[5].y >> 28); - b[8] = (uint2)(a[16].y << 13 | a[16].x >> 19, a[16].x << 13 | a[16].y >> 19); - b[21] = (uint2)(a[8].y << 23 | a[8].x >> 9, a[8].x << 23 | a[8].y >> 9); - b[24] = (uint2)(a[21].x << 2 | a[21].y >> 30, a[21].y << 2 | a[21].x >> 30); - b[4] = (uint2)(a[24].x << 14 | a[24].y >> 18, a[24].y << 14 | a[24].x >> 18); - b[15] = (uint2)(a[4].x << 27 | a[4].y >> 5, a[4].y << 27 | a[4].x >> 5); - b[23] = (uint2)(a[15].y << 9 | a[15].x >> 23, a[15].x << 9 | a[15].y >> 23); - b[19] = (uint2)(a[23].y << 24 | a[23].x >> 8, a[23].x << 24 | a[23].y >> 8); - b[13] = (uint2)(a[19].x << 8 | a[19].y >> 24, a[19].y << 8 | a[19].x >> 24); - b[12] = (uint2)(a[13].x << 25 | a[13].y >> 7, a[13].y << 25 | a[13].x >> 7); - b[2] = (uint2)(a[12].y << 11 | a[12].x >> 21, a[12].x << 11 | a[12].y >> 21); - b[20] = (uint2)(a[2].y << 30 | a[2].x >> 2, a[2].x << 30 | a[2].y >> 2); - b[14] = (uint2)(a[20].x << 18 | a[20].y >> 14, a[20].y << 18 | a[20].x >> 14); - b[22] = (uint2)(a[14].y << 7 | a[14].x >> 25, a[14].x << 7 | a[14].y >> 25); - b[9] = (uint2)(a[22].y << 29 | a[22].x >> 3, a[22].x << 29 | a[22].y >> 3); - b[6] = (uint2)(a[9].x << 20 | a[9].y >> 12, a[9].y << 20 | a[9].x >> 12); - b[1] = (uint2)(a[6].y << 12 | a[6].x >> 20, a[6].x << 12 | a[6].y >> 20); - - // Chi - a[0] = bitselect(b[0] ^ b[2], b[0], b[1]); - a[1] = bitselect(b[1] ^ b[3], b[1], b[2]); - a[2] = bitselect(b[2] ^ b[4], b[2], b[3]); - a[3] = bitselect(b[3] ^ b[0], b[3], b[4]); - if (out_size >= 4) - { - a[4] = bitselect(b[4] ^ b[1], b[4], b[0]); - a[5] = bitselect(b[5] ^ b[7], b[5], b[6]); - a[6] = bitselect(b[6] ^ b[8], b[6], b[7]); - a[7] = bitselect(b[7] ^ b[9], b[7], b[8]); - a[8] = bitselect(b[8] ^ b[5], b[8], b[9]); - if (out_size >= 8) - { - a[9] = bitselect(b[9] ^ b[6], b[9], b[5]); - a[10] = bitselect(b[10] ^ b[12], b[10], b[11]); - a[11] = bitselect(b[11] ^ b[13], b[11], b[12]); - a[12] = bitselect(b[12] ^ b[14], b[12], b[13]); - a[13] = bitselect(b[13] ^ b[10], b[13], b[14]); - a[14] = bitselect(b[14] ^ b[11], b[14], b[10]); - a[15] = bitselect(b[15] ^ b[17], b[15], b[16]); - a[16] = bitselect(b[16] ^ b[18], b[16], b[17]); - a[17] = bitselect(b[17] ^ b[19], b[17], b[18]); - a[18] = bitselect(b[18] ^ b[15], b[18], b[19]); - a[19] = bitselect(b[19] ^ b[16], b[19], b[15]); - a[20] = bitselect(b[20] ^ b[22], b[20], b[21]); - a[21] = bitselect(b[21] ^ b[23], b[21], b[22]); - a[22] = bitselect(b[22] ^ b[24], b[22], b[23]); - a[23] = bitselect(b[23] ^ b[20], b[23], b[24]); - a[24] = bitselect(b[24] ^ b[21], b[24], b[20]); - } - } - - // Iota - a[0] ^= Keccak_f1600_RC[r]; - - #if !__ENDIAN_LITTLE__ - for (uint i = 0; i != 25; ++i) - a[i] = a[i].yx; - #endif -} - -void keccak_f1600_no_absorb(ulong* a, uint in_size, uint out_size, uint isolate) -{ - for (uint i = in_size; i != 25; ++i) - { - a[i] = 0; - } -#if __ENDIAN_LITTLE__ - a[in_size] ^= 0x0000000000000001; - a[24-out_size*2] ^= 0x8000000000000000; -#else - a[in_size] ^= 0x0100000000000000; - a[24-out_size*2] ^= 0x0000000000000080; -#endif - - // Originally I unrolled the first and last rounds to interface - // better with surrounding code, however I haven't done this - // without causing the AMD compiler to blow up the VGPR usage. - uint r = 0; - do - { - // This dynamic branch stops the AMD compiler unrolling the loop - // and additionally saves about 33% of the VGPRs, enough to gain another - // wavefront. Ideally we'd get 4 in flight, but 3 is the best I can - // massage out of the compiler. It doesn't really seem to matter how - // much we try and help the compiler save VGPRs because it seems to throw - // that information away, hence the implementation of keccak here - // doesn't bother. - if (isolate) - { - keccak_f1600_round((uint2*)a, r++, 25); - } - } - while (r < 23); - - // final round optimised for digest size - keccak_f1600_round((uint2*)a, r++, out_size); -} - -#define copy(dst, src, count) for (uint i = 0; i != count; ++i) { (dst)[i] = (src)[i]; } - -#define countof(x) (sizeof(x) / sizeof(x[0])) - -uint fnv(uint x, uint y) -{ - return x * FNV_PRIME ^ y; -} - -uint4 fnv4(uint4 x, uint4 y) -{ - return x * FNV_PRIME ^ y; -} - -uint fnv_reduce(uint4 v) -{ - return fnv(fnv(fnv(v.x, v.y), v.z), v.w); -} - -typedef union -{ - ulong ulongs[32 / sizeof(ulong)]; - uint uints[32 / sizeof(uint)]; -} hash32_t; - -typedef union -{ - ulong ulongs[64 / sizeof(ulong)]; - uint4 uint4s[64 / sizeof(uint4)]; -} hash64_t; - -typedef union -{ - uint uints[128 / sizeof(uint)]; - uint4 uint4s[128 / sizeof(uint4)]; -} hash128_t; - -hash64_t init_hash(__constant hash32_t const* header, ulong nonce, uint isolate) -{ - hash64_t init; - uint const init_size = countof(init.ulongs); - uint const hash_size = countof(header->ulongs); - - // sha3_512(header .. nonce) - ulong state[25]; - copy(state, header->ulongs, hash_size); - state[hash_size] = nonce; - keccak_f1600_no_absorb(state, hash_size + 1, init_size, isolate); - - copy(init.ulongs, state, init_size); - return init; -} - -uint inner_loop(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, uint isolate) -{ - uint4 mix = init; - - // share init0 - if (thread_id == 0) - *share = mix.x; - barrier(CLK_LOCAL_MEM_FENCE); - uint init0 = *share; - - uint a = 0; - do - { - bool update_share = thread_id == (a/4) % THREADS_PER_HASH; - - #pragma unroll - for (uint i = 0; i != 4; ++i) - { - if (update_share) - { - uint m[4] = { mix.x, mix.y, mix.z, mix.w }; - *share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE; - } - barrier(CLK_LOCAL_MEM_FENCE); - - mix = fnv4(mix, g_dag[*share].uint4s[thread_id]); - } - } - while ((a += 4) != (ACCESSES & isolate)); - - return fnv_reduce(mix); -} - -hash32_t final_hash(hash64_t const* init, hash32_t const* mix, uint isolate) -{ - ulong state[25]; - - hash32_t hash; - uint const hash_size = countof(hash.ulongs); - uint const init_size = countof(init->ulongs); - uint const mix_size = countof(mix->ulongs); - - // keccak_256(keccak_512(header..nonce) .. mix); - copy(state, init->ulongs, init_size); - copy(state + init_size, mix->ulongs, mix_size); - keccak_f1600_no_absorb(state, init_size+mix_size, hash_size, isolate); - - // copy out - copy(hash.ulongs, state, hash_size); - return hash; -} - -hash32_t compute_hash_simple( - __constant hash32_t const* g_header, - __global hash128_t const* g_dag, - ulong nonce, - uint isolate - ) -{ - hash64_t init = init_hash(g_header, nonce, isolate); - - hash128_t mix; - for (uint i = 0; i != countof(mix.uint4s); ++i) - { - mix.uint4s[i] = init.uint4s[i % countof(init.uint4s)]; - } - - uint mix_val = mix.uints[0]; - uint init0 = mix.uints[0]; - uint a = 0; - do - { - uint pi = fnv(init0 ^ a, mix_val) % DAG_SIZE; - uint n = (a+1) % countof(mix.uints); - - #pragma unroll - for (uint i = 0; i != countof(mix.uints); ++i) - { - mix.uints[i] = fnv(mix.uints[i], g_dag[pi].uints[i]); - mix_val = i == n ? mix.uints[i] : mix_val; - } - } - while (++a != (ACCESSES & isolate)); - - // reduce to output - hash32_t fnv_mix; - for (uint i = 0; i != countof(fnv_mix.uints); ++i) - { - fnv_mix.uints[i] = fnv_reduce(mix.uint4s[i]); - } - - return final_hash(&init, &fnv_mix, isolate); -} - -typedef union -{ - struct - { - hash64_t init; - uint pad; // avoid lds bank conflicts - }; - hash32_t mix; -} compute_hash_share; - -hash32_t compute_hash( - __local compute_hash_share* share, - __constant hash32_t const* g_header, - __global hash128_t const* g_dag, - ulong nonce, - uint isolate - ) -{ - uint const gid = get_global_id(0); - - // Compute one init hash per work item. - hash64_t init = init_hash(g_header, nonce, isolate); - - // Threads work together in this phase in groups of 8. - uint const thread_id = gid % THREADS_PER_HASH; - uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH; - - hash32_t mix; - uint i = 0; - do - { - // share init with other threads - if (i == thread_id) - share[hash_id].init = init; - barrier(CLK_LOCAL_MEM_FENCE); - - uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))]; - barrier(CLK_LOCAL_MEM_FENCE); - - uint thread_mix = inner_loop(thread_init, thread_id, share[hash_id].mix.uints, g_dag, isolate); - - share[hash_id].mix.uints[thread_id] = thread_mix; - barrier(CLK_LOCAL_MEM_FENCE); - - if (i == thread_id) - mix = share[hash_id].mix; - barrier(CLK_LOCAL_MEM_FENCE); - } - while (++i != (THREADS_PER_HASH & isolate)); - - return final_hash(&init, &mix, isolate); -} - -__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) -__kernel void ethash_hash_simple( - __global hash32_t* g_hashes, - __constant hash32_t const* g_header, - __global hash128_t const* g_dag, - ulong start_nonce, - uint isolate - ) -{ - uint const gid = get_global_id(0); - g_hashes[gid] = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate); -} - -__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) -__kernel void ethash_search_simple( - __global volatile uint* restrict g_output, - __constant hash32_t const* g_header, - __global hash128_t const* g_dag, - ulong start_nonce, - ulong target, - uint isolate - ) -{ - uint const gid = get_global_id(0); - hash32_t hash = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate); - - if (hash.ulongs[countof(hash.ulongs)-1] < target) - { - uint slot = min(MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1); - g_output[slot] = gid; - } -} - -__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) -__kernel void ethash_hash( - __global hash32_t* g_hashes, - __constant hash32_t const* g_header, - __global hash128_t const* g_dag, - ulong start_nonce, - uint isolate - ) -{ - __local compute_hash_share share[HASHES_PER_LOOP]; - - uint const gid = get_global_id(0); - g_hashes[gid] = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate); -} - -__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) -__kernel void ethash_search( - __global volatile uint* restrict g_output, - __constant hash32_t const* g_header, - __global hash128_t const* g_dag, - ulong start_nonce, - ulong target, - uint isolate - ) -{ - __local compute_hash_share share[HASHES_PER_LOOP]; - - uint const gid = get_global_id(0); - hash32_t hash = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate); - - if (hash.ulongs[countof(hash.ulongs)-1] < target) - { - uint slot = min(MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1); - g_output[slot] = gid; - } -} - -)"; - -static void add_definition(std::string& source, char const* id, unsigned value) -{ - char buf[256]; - sprintf(buf, "#define %s %uu\n", id, value); - source.insert(source.begin(), buf, buf + strlen(buf)); -} - -ethash_cl_miner::ethash_cl_miner() -{ -} - -bool ethash_cl_miner::init(ethash_params const& params, const uint8_t seed[32], unsigned workgroup_size) -{ - // store params - m_params = params; - - // get all platforms - std::vector platforms; - cl::Platform::get(&platforms); - if (platforms.empty()) - { - debugf("No OpenCL platforms found.\n"); - return false; - } - - // use default platform - debugf("Using platform: %s\n", platforms[0].getInfo().c_str()); - - // get GPU device of the default platform - std::vector devices; - platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices); - if (devices.empty()) - { - debugf("No OpenCL devices found.\n"); - return false; - } - - // use default device - cl::Device& device = devices[0]; - debugf("Using device: %s\n", device.getInfo().c_str()); - - // create context - m_context = cl::Context({device}); - m_queue = cl::CommandQueue(m_context, device); - - // use requested workgroup size, but we require multiple of 8 - m_workgroup_size = ((workgroup_size + 7) / 8) * 8; - - // patch source code - std::string code = ethash_inner_code; - add_definition(code, "GROUP_SIZE", m_workgroup_size); - add_definition(code, "DAG_SIZE", (unsigned)(params.full_size / MIX_BYTES)); - add_definition(code, "ACCESSES", ACCESSES); - add_definition(code, "MAX_OUTPUTS", c_max_search_results); - //debugf("%s", code.c_str()); - - // create miner OpenCL program - cl::Program::Sources sources; - sources.push_back({code.c_str(), code.size()}); - - cl::Program program(m_context, sources); - try - { - program.build({device}); - } - catch (cl::Error err) - { - debugf("%s\n", program.getBuildInfo(device).c_str()); - return false; - } - m_hash_kernel = cl::Kernel(program, "ethash_hash"); - m_search_kernel = cl::Kernel(program, "ethash_search"); - - // create buffer for dag - m_dag = cl::Buffer(m_context, CL_MEM_READ_ONLY, params.full_size); - - // create buffer for header - m_header = cl::Buffer(m_context, CL_MEM_READ_ONLY, 32); - - // compute dag on CPU - { - void* cache_mem = malloc(params.cache_size + 63); - ethash_cache cache; - cache.mem = (void*)(((uintptr_t)cache_mem + 63) & ~63); - ethash_mkcache(&cache, ¶ms, seed); - - // if this throws then it's because we probably need to subdivide the dag uploads for compatibility - void* dag_ptr = m_queue.enqueueMapBuffer(m_dag, true, CL_MAP_WRITE_INVALIDATE_REGION, 0, params.full_size); - ethash_compute_full_data(dag_ptr, ¶ms, &cache); - m_queue.enqueueUnmapMemObject(m_dag, dag_ptr); - - free(cache_mem); - } - - // create mining buffers - for (unsigned i = 0; i != c_num_buffers; ++i) - { - m_hash_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY | CL_MEM_HOST_READ_ONLY, 32*c_hash_batch_size); - m_search_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, (c_max_search_results + 1) * sizeof(uint32_t)); - } - return true; -} - -void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count) -{ - struct pending_batch - { - unsigned base; - unsigned count; - unsigned buf; - }; - std::queue pending; - - // update header constant buffer - m_queue.enqueueWriteBuffer(m_header, true, 0, 32, header); - - /* - __kernel void ethash_combined_hash( - __global hash32_t* g_hashes, - __constant hash32_t const* g_header, - __global hash128_t const* g_dag, - ulong start_nonce, - uint isolate - ) - */ - m_hash_kernel.setArg(1, m_header); - m_hash_kernel.setArg(2, m_dag); - m_hash_kernel.setArg(3, nonce); - m_hash_kernel.setArg(4, ~0u); // have to pass this to stop the compile unrolling the loop - - unsigned buf = 0; - for (unsigned i = 0; i < count || !pending.empty(); ) - { - // how many this batch - if (i < count) - { - unsigned const this_count = std::min(count - i, c_hash_batch_size); - unsigned const batch_count = std::max(this_count, m_workgroup_size); - - // supply output hash buffer to kernel - m_hash_kernel.setArg(0, m_hash_buf[buf]); - - // execute it! - clock_t start_time = clock(); - m_queue.enqueueNDRangeKernel( - m_hash_kernel, - cl::NullRange, - cl::NDRange(batch_count), - cl::NDRange(m_workgroup_size) - ); - m_queue.flush(); - - pending.push({i, this_count, buf}); - i += this_count; - buf = (buf + 1) % c_num_buffers; - } - - // read results - if (i == count || pending.size() == c_num_buffers) - { - pending_batch const& batch = pending.front(); - - // could use pinned host pointer instead, but this path isn't that important. - uint8_t* hashes = (uint8_t*)m_queue.enqueueMapBuffer(m_hash_buf[batch.buf], true, CL_MAP_READ, 0, batch.count * HASH_BYTES); - memcpy(ret + batch.base*HASH_BYTES, hashes, batch.count*HASH_BYTES); - m_queue.enqueueUnmapMemObject(m_hash_buf[batch.buf], hashes); - - pending.pop(); - } - } -} - - -void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook) -{ - struct pending_batch - { - uint64_t start_nonce; - unsigned buf; - }; - std::queue pending; - - static uint32_t const c_zero = 0; - - // update header constant buffer - m_queue.enqueueWriteBuffer(m_header, false, 0, 32, header); - for (unsigned i = 0; i != c_num_buffers; ++i) - { - m_queue.enqueueWriteBuffer(m_search_buf[i], false, 0, 4, &c_zero); - } - cl::Event pre_return_event; - m_queue.enqueueBarrierWithWaitList(NULL, &pre_return_event); - - /* - __kernel void ethash_combined_search( - __global hash32_t* g_hashes, // 0 - __constant hash32_t const* g_header, // 1 - __global hash128_t const* g_dag, // 2 - ulong start_nonce, // 3 - ulong target, // 4 - uint isolate // 5 - ) - */ - m_search_kernel.setArg(1, m_header); - m_search_kernel.setArg(2, m_dag); - - // pass these to stop the compiler unrolling the loops - m_search_kernel.setArg(4, target); - m_search_kernel.setArg(5, ~0u); - - - unsigned buf = 0; - for (uint64_t start_nonce = 0; ; start_nonce += c_search_batch_size) - { - // supply output buffer to kernel - m_search_kernel.setArg(0, m_search_buf[buf]); - m_search_kernel.setArg(3, start_nonce); - - // execute it! - m_queue.enqueueNDRangeKernel(m_search_kernel, cl::NullRange, c_search_batch_size, m_workgroup_size); - - pending.push({start_nonce, buf}); - buf = (buf + 1) % c_num_buffers; - - // read results - if (pending.size() == c_num_buffers) - { - pending_batch const& batch = pending.front(); - - // could use pinned host pointer instead - uint32_t* results = (uint32_t*)m_queue.enqueueMapBuffer(m_search_buf[batch.buf], true, CL_MAP_READ, 0, (1+c_max_search_results) * sizeof(uint32_t)); - unsigned num_found = std::min(results[0], c_max_search_results); - - uint64_t nonces[c_max_search_results]; - for (unsigned i = 0; i != num_found; ++i) - { - nonces[i] = batch.start_nonce + results[i+1]; - } - - m_queue.enqueueUnmapMemObject(m_search_buf[batch.buf], results); - - bool exit = num_found && hook.found(nonces, num_found); - exit |= hook.searched(batch.start_nonce, c_search_batch_size); // always report searched before exit - if (exit) - break; - - pending.pop(); - } - } - - // not safe to return until this is ready - pre_return_event.wait(); -} - diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.h deleted file mode 100644 index f37100d91..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.h +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once - -#define __CL_ENABLE_EXCEPTIONS -#define CL_USE_DEPRECATED_OPENCL_2_0_APIS -#include "cl.hpp" -#include -#include - -class ethash_cl_miner -{ -public: - struct search_hook - { - // reports progress, return true to abort - virtual bool found(uint64_t const* nonces, uint32_t count) = 0; - virtual bool searched(uint64_t start_nonce, uint32_t count) = 0; - }; - -public: - ethash_cl_miner(); - - bool init(ethash_params const& params, const uint8_t seed[32], unsigned workgroup_size = 64); - - void hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count); - void search(uint8_t const* header, uint64_t target, search_hook& hook); - -private: - static unsigned const c_max_search_results = 63; - static unsigned const c_num_buffers = 2; - static unsigned const c_hash_batch_size = 1024; - static unsigned const c_search_batch_size = 1024*256; - - ethash_params m_params; - cl::Context m_context; - cl::CommandQueue m_queue; - cl::Kernel m_hash_kernel; - cl::Kernel m_search_kernel; - cl::Buffer m_dag; - cl::Buffer m_header; - cl::Buffer m_hash_buf[c_num_buffers]; - cl::Buffer m_search_buf[c_num_buffers]; - unsigned m_workgroup_size; -}; \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/CMakeLists.txt deleted file mode 100644 index b30ed3e2d..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -find_package(CUDA) - -# Pass options to NVCC - - -if (CUDA_FOUND) -set(CUDA_NVCC_FLAGS " -gencode;arch=compute_30,code=sm_30; - -gencode;arch=compute_20,code=sm_20; - -gencode;arch=compute_11,code=sm_11; - -gencode;arch=compute_12,code=sm_12; - -gencode;arch=compute_13,code=sm_13;") -cuda_add_executable( - ethash-cuda - libethash.cu) -endif() \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cu b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cu deleted file mode 100644 index f06653f2d..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cu +++ /dev/null @@ -1,879 +0,0 @@ -/* - Copyright 2009 NVIDIA Corporation. All rights reserved. - - NOTICE TO LICENSEE: - - This source code and/or documentation ("Licensed Deliverables") are subject - to NVIDIA intellectual property rights under U.S. and international Copyright - laws. - - These Licensed Deliverables contained herein is PROPRIETARY and CONFIDENTIAL - to NVIDIA and is being provided under the terms and conditions of a form of - NVIDIA software license agreement by and between NVIDIA and Licensee ("License - Agreement") or electronically accepted by Licensee. Notwithstanding any terms - or conditions to the contrary in the License Agreement, reproduction or - disclosure of the Licensed Deliverables to any third party without the express - written consent of NVIDIA is prohibited. - - NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT, - NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THESE LICENSED - DELIVERABLES FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED - WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE - LICENSED DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, - NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. NOTWITHSTANDING ANY - TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT, IN NO EVENT SHALL - NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, - OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER - IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THESE LICENSED DELIVERABLES. - - U.S. Government End Users. These Licensed Deliverables are a "commercial item" - as that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of - "commercial computer software" and "commercial computer software documentation" - as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) and is provided to the - U.S. Government only as a commercial end item. Consistent with 48 C.F.R.12.212 - and 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all U.S. Government - End Users acquire the Licensed Deliverables with only those rights set forth - herein. - - Any use of the Licensed Deliverables in individual and commercial software must - include, in the user documentation and internal comments to the code, the above - Disclaimer and U.S. Government End Users Notice. - */ - -/* - * cuPrintf.cu - * - * This is a printf command callable from within a kernel. It is set - * up so that output is sent to a memory buffer, which is emptied from - * the host side - but only after a cudaThreadSynchronize() on the host. - * - * Currently, there is a limitation of around 200 characters of output - * and no more than 10 arguments to a single cuPrintf() call. Issue - * multiple calls if longer format strings are required. - * - * It requires minimal setup, and is *NOT* optimised for performance. - * For example, writes are not coalesced - this is because there is an - * assumption that people will not want to printf from every single one - * of thousands of threads, but only from individual threads at a time. - * - * Using this is simple - it requires one host-side call to initialise - * everything, and then kernels can call cuPrintf at will. Sample code - * is the easiest way to demonstrate: - * - #include "cuPrintf.cu" - - __global__ void testKernel(int val) - { - cuPrintf("Value is: %d\n", val); - } - - int main() - { - cudaPrintfInit(); - testKernel<<< 2, 3 >>>(10); - cudaPrintfDisplay(stdout, true); - cudaPrintfEnd(); - return 0; - } - * - * See the header file, "cuPrintf.cuh" for more info, especially - * arguments to cudaPrintfInit() and cudaPrintfDisplay(); - */ - -#ifndef CUPRINTF_CU -#define CUPRINTF_CU - -#include "cuPrintf.cuh" -#if __CUDA_ARCH__ > 100 // Atomics only used with > sm_10 architecture -#include -#endif - -// This is the smallest amount of memory, per-thread, which is allowed. -// It is also the largest amount of space a single printf() can take up -const static int CUPRINTF_MAX_LEN = 256; - -// This structure is used internally to track block/thread output restrictions. -typedef struct __align__(8) { - int threadid; // CUPRINTF_UNRESTRICTED for unrestricted - int blockid; // CUPRINTF_UNRESTRICTED for unrestricted -} cuPrintfRestriction; - -// The main storage is in a global print buffer, which has a known -// start/end/length. These are atomically updated so it works as a -// circular buffer. -// Since the only control primitive that can be used is atomicAdd(), -// we cannot wrap the pointer as such. The actual address must be -// calculated from printfBufferPtr by mod-ing with printfBufferLength. -// For sm_10 architecture, we must subdivide the buffer per-thread -// since we do not even have an atomic primitive. -__constant__ static char *globalPrintfBuffer = NULL; // Start of circular buffer (set up by host) -__constant__ static int printfBufferLength = 0; // Size of circular buffer (set up by host) -__device__ static cuPrintfRestriction restrictRules; // Output restrictions -__device__ volatile static char *printfBufferPtr = NULL; // Current atomically-incremented non-wrapped offset - -// This is the header preceeding all printf entries. -// NOTE: It *must* be size-aligned to the maximum entity size (size_t) -typedef struct __align__(8) { - unsigned short magic; // Magic number says we're valid - unsigned short fmtoffset; // Offset of fmt string into buffer - unsigned short blockid; // Block ID of author - unsigned short threadid; // Thread ID of author -} cuPrintfHeader; - -// Special header for sm_10 architecture -#define CUPRINTF_SM10_MAGIC 0xC810 // Not a valid ascii character -typedef struct __align__(16) { - unsigned short magic; // sm_10 specific magic number - unsigned short unused; - unsigned int thread_index; // thread ID for this buffer - unsigned int thread_buf_len; // per-thread buffer length - unsigned int offset; // most recent printf's offset -} cuPrintfHeaderSM10; - - -// Because we can't write an element which is not aligned to its bit-size, -// we have to align all sizes and variables on maximum-size boundaries. -// That means sizeof(double) in this case, but we'll use (long long) for -// better arch<1.3 support -#define CUPRINTF_ALIGN_SIZE sizeof(long long) - -// All our headers are prefixed with a magic number so we know they're ready -#define CUPRINTF_SM11_MAGIC (unsigned short)0xC811 // Not a valid ascii character - - -// -// getNextPrintfBufPtr -// -// Grabs a block of space in the general circular buffer, using an -// atomic function to ensure that it's ours. We handle wrapping -// around the circular buffer and return a pointer to a place which -// can be written to. -// -// Important notes: -// 1. We always grab CUPRINTF_MAX_LEN bytes -// 2. Because of 1, we never worry about wrapping around the end -// 3. Because of 1, printfBufferLength *must* be a factor of CUPRINTF_MAX_LEN -// -// This returns a pointer to the place where we own. -// -__device__ static char *getNextPrintfBufPtr() -{ - // Initialisation check - if(!printfBufferPtr) - return NULL; - - // Thread/block restriction check - if((restrictRules.blockid != CUPRINTF_UNRESTRICTED) && (restrictRules.blockid != (blockIdx.x + gridDim.x*blockIdx.y))) - return NULL; - if((restrictRules.threadid != CUPRINTF_UNRESTRICTED) && (restrictRules.threadid != (threadIdx.x + blockDim.x*threadIdx.y + blockDim.x*blockDim.y*threadIdx.z))) - return NULL; - - // Conditional section, dependent on architecture -#if __CUDA_ARCH__ == 100 - // For sm_10 architectures, we have no atomic add - this means we must split the - // entire available buffer into per-thread blocks. Inefficient, but what can you do. - int thread_count = (gridDim.x * gridDim.y) * (blockDim.x * blockDim.y * blockDim.z); - int thread_index = threadIdx.x + blockDim.x*threadIdx.y + blockDim.x*blockDim.y*threadIdx.z + - (blockIdx.x + gridDim.x*blockIdx.y) * (blockDim.x * blockDim.y * blockDim.z); - - // Find our own block of data and go to it. Make sure the per-thread length - // is a precise multiple of CUPRINTF_MAX_LEN, otherwise we risk size and - // alignment issues! We must round down, of course. - unsigned int thread_buf_len = printfBufferLength / thread_count; - thread_buf_len &= ~(CUPRINTF_MAX_LEN-1); - - // We *must* have a thread buffer length able to fit at least two printfs (one header, one real) - if(thread_buf_len < (CUPRINTF_MAX_LEN * 2)) - return NULL; - - // Now address our section of the buffer. The first item is a header. - char *myPrintfBuffer = globalPrintfBuffer + (thread_buf_len * thread_index); - cuPrintfHeaderSM10 hdr = *(cuPrintfHeaderSM10 *)(void *)myPrintfBuffer; - if(hdr.magic != CUPRINTF_SM10_MAGIC) - { - // If our header is not set up, initialise it - hdr.magic = CUPRINTF_SM10_MAGIC; - hdr.thread_index = thread_index; - hdr.thread_buf_len = thread_buf_len; - hdr.offset = 0; // Note we start at 0! We pre-increment below. - *(cuPrintfHeaderSM10 *)(void *)myPrintfBuffer = hdr; // Write back the header - - // For initial setup purposes, we might need to init thread0's header too - // (so that cudaPrintfDisplay() below will work). This is only run once. - cuPrintfHeaderSM10 *tophdr = (cuPrintfHeaderSM10 *)(void *)globalPrintfBuffer; - tophdr->thread_buf_len = thread_buf_len; - } - - // Adjust the offset by the right amount, and wrap it if need be - unsigned int offset = hdr.offset + CUPRINTF_MAX_LEN; - if(offset >= hdr.thread_buf_len) - offset = CUPRINTF_MAX_LEN; - - // Write back the new offset for next time and return a pointer to it - ((cuPrintfHeaderSM10 *)(void *)myPrintfBuffer)->offset = offset; - return myPrintfBuffer + offset; -#else - // Much easier with an atomic operation! - size_t offset = atomicAdd((unsigned int *)&printfBufferPtr, CUPRINTF_MAX_LEN) - (size_t)globalPrintfBuffer; - offset %= printfBufferLength; - return globalPrintfBuffer + offset; -#endif -} - - -// -// writePrintfHeader -// -// Inserts the header for containing our UID, fmt position and -// block/thread number. We generate it dynamically to avoid -// issues arising from requiring pre-initialisation. -// -__device__ static void writePrintfHeader(char *ptr, char *fmtptr) -{ - if(ptr) - { - cuPrintfHeader header; - header.magic = CUPRINTF_SM11_MAGIC; - header.fmtoffset = (unsigned short)(fmtptr - ptr); - header.blockid = blockIdx.x + gridDim.x*blockIdx.y; - header.threadid = threadIdx.x + blockDim.x*threadIdx.y + blockDim.x*blockDim.y*threadIdx.z; - *(cuPrintfHeader *)(void *)ptr = header; - } -} - - -// -// cuPrintfStrncpy -// -// This special strncpy outputs an aligned length value, followed by the -// string. It then zero-pads the rest of the string until a 64-aligned -// boundary. The length *includes* the padding. A pointer to the byte -// just after the \0 is returned. -// -// This function could overflow CUPRINTF_MAX_LEN characters in our buffer. -// To avoid it, we must count as we output and truncate where necessary. -// -__device__ static char *cuPrintfStrncpy(char *dest, const char *src, int n, char *end) -{ - // Initialisation and overflow check - if(!dest || !src || (dest >= end)) - return NULL; - - // Prepare to write the length specifier. We're guaranteed to have - // at least "CUPRINTF_ALIGN_SIZE" bytes left because we only write out in - // chunks that size, and CUPRINTF_MAX_LEN is aligned with CUPRINTF_ALIGN_SIZE. - int *lenptr = (int *)(void *)dest; - int len = 0; - dest += CUPRINTF_ALIGN_SIZE; - - // Now copy the string - while(n--) - { - if(dest >= end) // Overflow check - break; - - len++; - *dest++ = *src; - if(*src++ == '\0') - break; - } - - // Now write out the padding bytes, and we have our length. - while((dest < end) && (((long)dest & (CUPRINTF_ALIGN_SIZE-1)) != 0)) - { - len++; - *dest++ = 0; - } - *lenptr = len; - return (dest < end) ? dest : NULL; // Overflow means return NULL -} - - -// -// copyArg -// -// This copies a length specifier and then the argument out to the -// data buffer. Templates let the compiler figure all this out at -// compile-time, making life much simpler from the programming -// point of view. I'm assuimg all (const char *) is a string, and -// everything else is the variable it points at. I'd love to see -// a better way of doing it, but aside from parsing the format -// string I can't think of one. -// -// The length of the data type is inserted at the beginning (so that -// the display can distinguish between float and double), and the -// pointer to the end of the entry is returned. -// -__device__ static char *copyArg(char *ptr, const char *arg, char *end) -{ - // Initialisation check - if(!ptr || !arg) - return NULL; - - // strncpy does all our work. We just terminate. - if((ptr = cuPrintfStrncpy(ptr, arg, CUPRINTF_MAX_LEN, end)) != NULL) - *ptr = 0; - - return ptr; -} - -template -__device__ static char *copyArg(char *ptr, T &arg, char *end) -{ - // Initisalisation and overflow check. Alignment rules mean that - // we're at least CUPRINTF_ALIGN_SIZE away from "end", so we only need - // to check that one offset. - if(!ptr || ((ptr+CUPRINTF_ALIGN_SIZE) >= end)) - return NULL; - - // Write the length and argument - *(int *)(void *)ptr = sizeof(arg); - ptr += CUPRINTF_ALIGN_SIZE; - *(T *)(void *)ptr = arg; - ptr += CUPRINTF_ALIGN_SIZE; - *ptr = 0; - - return ptr; -} - - -// -// cuPrintf -// -// Templated printf functions to handle multiple arguments. -// Note we return the total amount of data copied, not the number -// of characters output. But then again, who ever looks at the -// return from printf() anyway? -// -// The format is to grab a block of circular buffer space, the -// start of which will hold a header and a pointer to the format -// string. We then write in all the arguments, and finally the -// format string itself. This is to make it easy to prevent -// overflow of our buffer (we support up to 10 arguments, each of -// which can be 12 bytes in length - that means that only the -// format string (or a %s) can actually overflow; so the overflow -// check need only be in the strcpy function. -// -// The header is written at the very last because that's what -// makes it look like we're done. -// -// Errors, which are basically lack-of-initialisation, are ignored -// in the called functions because NULL pointers are passed around -// - -// All printf variants basically do the same thing, setting up the -// buffer, writing all arguments, then finalising the header. For -// clarity, we'll pack the code into some big macros. -#define CUPRINTF_PREAMBLE \ - char *start, *end, *bufptr, *fmtstart; \ - if((start = getNextPrintfBufPtr()) == NULL) return 0; \ - end = start + CUPRINTF_MAX_LEN; \ - bufptr = start + sizeof(cuPrintfHeader); - -// Posting an argument is easy -#define CUPRINTF_ARG(argname) \ - bufptr = copyArg(bufptr, argname, end); - -// After args are done, record start-of-fmt and write the fmt and header -#define CUPRINTF_POSTAMBLE \ - fmtstart = bufptr; \ - end = cuPrintfStrncpy(bufptr, fmt, CUPRINTF_MAX_LEN, end); \ - writePrintfHeader(start, end ? fmtstart : NULL); \ - return end ? (int)(end - start) : 0; - -__device__ int cuPrintf(const char *fmt) -{ - CUPRINTF_PREAMBLE; - - CUPRINTF_POSTAMBLE; -} -template __device__ int cuPrintf(const char *fmt, T1 arg1) -{ - CUPRINTF_PREAMBLE; - - CUPRINTF_ARG(arg1); - - CUPRINTF_POSTAMBLE; -} -template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2) -{ - CUPRINTF_PREAMBLE; - - CUPRINTF_ARG(arg1); - CUPRINTF_ARG(arg2); - - CUPRINTF_POSTAMBLE; -} -template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3) -{ - CUPRINTF_PREAMBLE; - - CUPRINTF_ARG(arg1); - CUPRINTF_ARG(arg2); - CUPRINTF_ARG(arg3); - - CUPRINTF_POSTAMBLE; -} -template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4) -{ - CUPRINTF_PREAMBLE; - - CUPRINTF_ARG(arg1); - CUPRINTF_ARG(arg2); - CUPRINTF_ARG(arg3); - CUPRINTF_ARG(arg4); - - CUPRINTF_POSTAMBLE; -} -template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5) -{ - CUPRINTF_PREAMBLE; - - CUPRINTF_ARG(arg1); - CUPRINTF_ARG(arg2); - CUPRINTF_ARG(arg3); - CUPRINTF_ARG(arg4); - CUPRINTF_ARG(arg5); - - CUPRINTF_POSTAMBLE; -} -template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6) -{ - CUPRINTF_PREAMBLE; - - CUPRINTF_ARG(arg1); - CUPRINTF_ARG(arg2); - CUPRINTF_ARG(arg3); - CUPRINTF_ARG(arg4); - CUPRINTF_ARG(arg5); - CUPRINTF_ARG(arg6); - CUPRINTF_POSTAMBLE; -} -template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7) -{ - CUPRINTF_PREAMBLE; - - CUPRINTF_ARG(arg1); - CUPRINTF_ARG(arg2); - CUPRINTF_ARG(arg3); - CUPRINTF_ARG(arg4); - CUPRINTF_ARG(arg5); - CUPRINTF_ARG(arg6); - CUPRINTF_ARG(arg7); - - CUPRINTF_POSTAMBLE; -} -template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8) -{ - CUPRINTF_PREAMBLE; - - CUPRINTF_ARG(arg1); - CUPRINTF_ARG(arg2); - CUPRINTF_ARG(arg3); - CUPRINTF_ARG(arg4); - CUPRINTF_ARG(arg5); - CUPRINTF_ARG(arg6); - CUPRINTF_ARG(arg7); - CUPRINTF_ARG(arg8); - - CUPRINTF_POSTAMBLE; -} -template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9) -{ - CUPRINTF_PREAMBLE; - - CUPRINTF_ARG(arg1); - CUPRINTF_ARG(arg2); - CUPRINTF_ARG(arg3); - CUPRINTF_ARG(arg4); - CUPRINTF_ARG(arg5); - CUPRINTF_ARG(arg6); - CUPRINTF_ARG(arg7); - CUPRINTF_ARG(arg8); - CUPRINTF_ARG(arg9); - - CUPRINTF_POSTAMBLE; -} -template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9, T10 arg10) -{ - CUPRINTF_PREAMBLE; - - CUPRINTF_ARG(arg1); - CUPRINTF_ARG(arg2); - CUPRINTF_ARG(arg3); - CUPRINTF_ARG(arg4); - CUPRINTF_ARG(arg5); - CUPRINTF_ARG(arg6); - CUPRINTF_ARG(arg7); - CUPRINTF_ARG(arg8); - CUPRINTF_ARG(arg9); - CUPRINTF_ARG(arg10); - - CUPRINTF_POSTAMBLE; -} -#undef CUPRINTF_PREAMBLE -#undef CUPRINTF_ARG -#undef CUPRINTF_POSTAMBLE - - -// -// cuPrintfRestrict -// -// Called to restrict output to a given thread/block. -// We store the info in "restrictRules", which is set up at -// init time by the host. It's not the cleanest way to do this -// because it means restrictions will last between -// invocations, but given the output-pointer continuity, -// I feel this is reasonable. -// -__device__ void cuPrintfRestrict(int threadid, int blockid) -{ - int thread_count = blockDim.x * blockDim.y * blockDim.z; - if(((threadid < thread_count) && (threadid >= 0)) || (threadid == CUPRINTF_UNRESTRICTED)) - restrictRules.threadid = threadid; - - int block_count = gridDim.x * gridDim.y; - if(((blockid < block_count) && (blockid >= 0)) || (blockid == CUPRINTF_UNRESTRICTED)) - restrictRules.blockid = blockid; -} - - -/////////////////////////////////////////////////////////////////////////////// -// HOST SIDE - -#include -static FILE *printf_fp; - -static char *printfbuf_start=NULL; -static char *printfbuf_device=NULL; -static int printfbuf_len=0; - - -// -// outputPrintfData -// -// Our own internal function, which takes a pointer to a data buffer -// and passes it through libc's printf for output. -// -// We receive the formate string and a pointer to where the data is -// held. We then run through and print it out. -// -// Returns 0 on failure, 1 on success -// -static int outputPrintfData(char *fmt, char *data) -{ - // Format string is prefixed by a length that we don't need - fmt += CUPRINTF_ALIGN_SIZE; - - // Now run through it, printing everything we can. We must - // run to every % character, extract only that, and use printf - // to format it. - char *p = strchr(fmt, '%'); - while(p != NULL) - { - // Print up to the % character - *p = '\0'; - fputs(fmt, printf_fp); - *p = '%'; // Put back the % - - // Now handle the format specifier - char *format = p++; // Points to the '%' - p += strcspn(p, "%cdiouxXeEfgGaAnps"); - if(*p == '\0') // If no format specifier, print the whole thing - { - fmt = format; - break; - } - - // Cut out the format bit and use printf to print it. It's prefixed - // by its length. - int arglen = *(int *)data; - if(arglen > CUPRINTF_MAX_LEN) - { - fputs("Corrupt printf buffer data - aborting\n", printf_fp); - return 0; - } - - data += CUPRINTF_ALIGN_SIZE; - - char specifier = *p++; - char c = *p; // Store for later - *p = '\0'; - switch(specifier) - { - // These all take integer arguments - case 'c': - case 'd': - case 'i': - case 'o': - case 'u': - case 'x': - case 'X': - case 'p': - fprintf(printf_fp, format, *((int *)data)); - break; - - // These all take double arguments - case 'e': - case 'E': - case 'f': - case 'g': - case 'G': - case 'a': - case 'A': - if(arglen == 4) // Float vs. Double thing - fprintf(printf_fp, format, *((float *)data)); - else - fprintf(printf_fp, format, *((double *)data)); - break; - - // Strings are handled in a special way - case 's': - fprintf(printf_fp, format, (char *)data); - break; - - // % is special - case '%': - fprintf(printf_fp, "%%"); - break; - - // Everything else is just printed out as-is - default: - fprintf(printf_fp, format); - break; - } - data += CUPRINTF_ALIGN_SIZE; // Move on to next argument - *p = c; // Restore what we removed - fmt = p; // Adjust fmt string to be past the specifier - p = strchr(fmt, '%'); // and get the next specifier - } - - // Print out the last of the string - fputs(fmt, printf_fp); - return 1; -} - - -// -// doPrintfDisplay -// -// This runs through the blocks of CUPRINTF_MAX_LEN-sized data, calling the -// print function above to display them. We've got this separate from -// cudaPrintfDisplay() below so we can handle the SM_10 architecture -// partitioning. -// -static int doPrintfDisplay(int headings, int clear, char *bufstart, char *bufend, char *bufptr, char *endptr) -{ - // Grab, piece-by-piece, each output element until we catch - // up with the circular buffer end pointer - int printf_count=0; - char printfbuf_local[CUPRINTF_MAX_LEN+1]; - printfbuf_local[CUPRINTF_MAX_LEN] = '\0'; - - while(bufptr != endptr) - { - // Wrap ourselves at the end-of-buffer - if(bufptr == bufend) - bufptr = bufstart; - - // Adjust our start pointer to within the circular buffer and copy a block. - cudaMemcpy(printfbuf_local, bufptr, CUPRINTF_MAX_LEN, cudaMemcpyDeviceToHost); - - // If the magic number isn't valid, then this write hasn't gone through - // yet and we'll wait until it does (or we're past the end for non-async printfs). - cuPrintfHeader *hdr = (cuPrintfHeader *)printfbuf_local; - if((hdr->magic != CUPRINTF_SM11_MAGIC) || (hdr->fmtoffset >= CUPRINTF_MAX_LEN)) - { - //fprintf(printf_fp, "Bad magic number in printf header\n"); - break; - } - - // Extract all the info and get this printf done - if(headings) - fprintf(printf_fp, "[%d, %d]: ", hdr->blockid, hdr->threadid); - if(hdr->fmtoffset == 0) - fprintf(printf_fp, "printf buffer overflow\n"); - else if(!outputPrintfData(printfbuf_local+hdr->fmtoffset, printfbuf_local+sizeof(cuPrintfHeader))) - break; - printf_count++; - - // Clear if asked - if(clear) - cudaMemset(bufptr, 0, CUPRINTF_MAX_LEN); - - // Now advance our start location, because we're done, and keep copying - bufptr += CUPRINTF_MAX_LEN; - } - - return printf_count; -} - - -// -// cudaPrintfInit -// -// Takes a buffer length to allocate, creates the memory on the device and -// returns a pointer to it for when a kernel is called. It's up to the caller -// to free it. -// -extern "C" cudaError_t cudaPrintfInit(size_t bufferLen) -{ - // Fix up bufferlen to be a multiple of CUPRINTF_MAX_LEN - bufferLen = (bufferLen < CUPRINTF_MAX_LEN) ? CUPRINTF_MAX_LEN : bufferLen; - if((bufferLen % CUPRINTF_MAX_LEN) > 0) - bufferLen += (CUPRINTF_MAX_LEN - (bufferLen % CUPRINTF_MAX_LEN)); - printfbuf_len = (int)bufferLen; - - // Allocate a print buffer on the device and zero it - if(cudaMalloc((void **)&printfbuf_device, printfbuf_len) != cudaSuccess) - return cudaErrorInitializationError; - cudaMemset(printfbuf_device, 0, printfbuf_len); - printfbuf_start = printfbuf_device; // Where we start reading from - - // No restrictions to begin with - cuPrintfRestriction restrict; - restrict.threadid = restrict.blockid = CUPRINTF_UNRESTRICTED; - cudaMemcpyToSymbol(restrictRules, &restrict, sizeof(restrict)); - - // Initialise the buffer and the respective lengths/pointers. - cudaMemcpyToSymbol(globalPrintfBuffer, &printfbuf_device, sizeof(char *)); - cudaMemcpyToSymbol(printfBufferPtr, &printfbuf_device, sizeof(char *)); - cudaMemcpyToSymbol(printfBufferLength, &printfbuf_len, sizeof(printfbuf_len)); - - return cudaSuccess; -} - - -// -// cudaPrintfEnd -// -// Frees up the memory which we allocated -// -extern "C" void cudaPrintfEnd() -{ - if(!printfbuf_start || !printfbuf_device) - return; - - cudaFree(printfbuf_device); - printfbuf_start = printfbuf_device = NULL; -} - - -// -// cudaPrintfDisplay -// -// Each call to this function dumps the entire current contents -// of the printf buffer to the pre-specified FILE pointer. The -// circular "start" pointer is advanced so that subsequent calls -// dumps only new stuff. -// -// In the case of async memory access (via streams), call this -// repeatedly to keep trying to empty the buffer. If it's a sync -// access, then the whole buffer should empty in one go. -// -// Arguments: -// outputFP - File descriptor to output to (NULL => stdout) -// showThreadID - If true, prints [block,thread] before each line -// -extern "C" cudaError_t cudaPrintfDisplay(void *outputFP, bool showThreadID) -{ - printf_fp = (FILE *)((outputFP == NULL) ? stdout : outputFP); - - // For now, we force "synchronous" mode which means we're not concurrent - // with kernel execution. This also means we don't need clearOnPrint. - // If you're patching it for async operation, here's where you want it. - bool sync_printfs = true; - bool clearOnPrint = false; - - // Initialisation check - if(!printfbuf_start || !printfbuf_device || !printf_fp) - return cudaErrorMissingConfiguration; - - // To determine which architecture we're using, we read the - // first short from the buffer - it'll be the magic number - // relating to the version. - unsigned short magic; - cudaMemcpy(&magic, printfbuf_device, sizeof(unsigned short), cudaMemcpyDeviceToHost); - - // For SM_10 architecture, we've split our buffer into one-per-thread. - // That means we must do each thread block separately. It'll require - // extra reading. We also, for now, don't support async printfs because - // that requires tracking one start pointer per thread. - if(magic == CUPRINTF_SM10_MAGIC) - { - sync_printfs = true; - clearOnPrint = false; - int blocklen = 0; - char *blockptr = printfbuf_device; - while(blockptr < (printfbuf_device + printfbuf_len)) - { - cuPrintfHeaderSM10 hdr; - cudaMemcpy(&hdr, blockptr, sizeof(hdr), cudaMemcpyDeviceToHost); - - // We get our block-size-step from the very first header - if(hdr.thread_buf_len != 0) - blocklen = hdr.thread_buf_len; - - // No magic number means no printfs from this thread - if(hdr.magic != CUPRINTF_SM10_MAGIC) - { - if(blocklen == 0) - { - fprintf(printf_fp, "No printf headers found at all!\n"); - break; // No valid headers! - } - blockptr += blocklen; - continue; - } - - // "offset" is non-zero then we can print the block contents - if(hdr.offset > 0) - { - // For synchronous printfs, we must print from endptr->bufend, then from start->end - if(sync_printfs) - doPrintfDisplay(showThreadID, clearOnPrint, blockptr+CUPRINTF_MAX_LEN, blockptr+hdr.thread_buf_len, blockptr+hdr.offset+CUPRINTF_MAX_LEN, blockptr+hdr.thread_buf_len); - doPrintfDisplay(showThreadID, clearOnPrint, blockptr+CUPRINTF_MAX_LEN, blockptr+hdr.thread_buf_len, blockptr+CUPRINTF_MAX_LEN, blockptr+hdr.offset+CUPRINTF_MAX_LEN); - } - - // Move on to the next block and loop again - blockptr += hdr.thread_buf_len; - } - } - // For SM_11 and up, everything is a single buffer and it's simple - else if(magic == CUPRINTF_SM11_MAGIC) - { - // Grab the current "end of circular buffer" pointer. - char *printfbuf_end = NULL; - cudaMemcpyFromSymbol(&printfbuf_end, printfBufferPtr, sizeof(char *)); - - // Adjust our starting and ending pointers to within the block - char *bufptr = ((printfbuf_start - printfbuf_device) % printfbuf_len) + printfbuf_device; - char *endptr = ((printfbuf_end - printfbuf_device) % printfbuf_len) + printfbuf_device; - - // For synchronous (i.e. after-kernel-exit) printf display, we have to handle circular - // buffer wrap carefully because we could miss those past "end". - if(sync_printfs) - doPrintfDisplay(showThreadID, clearOnPrint, printfbuf_device, printfbuf_device+printfbuf_len, endptr, printfbuf_device+printfbuf_len); - doPrintfDisplay(showThreadID, clearOnPrint, printfbuf_device, printfbuf_device+printfbuf_len, bufptr, endptr); - - printfbuf_start = printfbuf_end; - } - else - ;//printf("Bad magic number in cuPrintf buffer header\n"); - - // If we were synchronous, then we must ensure that the memory is cleared on exit - // otherwise another kernel launch with a different grid size could conflict. - if(sync_printfs) - cudaMemset(printfbuf_device, 0, printfbuf_len); - - return cudaSuccess; -} - -// Cleanup -#undef CUPRINTF_MAX_LEN -#undef CUPRINTF_ALIGN_SIZE -#undef CUPRINTF_SM10_MAGIC -#undef CUPRINTF_SM11_MAGIC - -#endif diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cuh b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cuh deleted file mode 100644 index cf3fe4868..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cuh +++ /dev/null @@ -1,162 +0,0 @@ -/* - Copyright 2009 NVIDIA Corporation. All rights reserved. - - NOTICE TO LICENSEE: - - This source code and/or documentation ("Licensed Deliverables") are subject - to NVIDIA intellectual property rights under U.S. and international Copyright - laws. - - These Licensed Deliverables contained herein is PROPRIETARY and CONFIDENTIAL - to NVIDIA and is being provided under the terms and conditions of a form of - NVIDIA software license agreement by and between NVIDIA and Licensee ("License - Agreement") or electronically accepted by Licensee. Notwithstanding any terms - or conditions to the contrary in the License Agreement, reproduction or - disclosure of the Licensed Deliverables to any third party without the express - written consent of NVIDIA is prohibited. - - NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT, - NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THESE LICENSED - DELIVERABLES FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED - WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE - LICENSED DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, - NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. NOTWITHSTANDING ANY - TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT, IN NO EVENT SHALL - NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, - OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER - IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THESE LICENSED DELIVERABLES. - - U.S. Government End Users. These Licensed Deliverables are a "commercial item" - as that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of - "commercial computer software" and "commercial computer software documentation" - as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) and is provided to the - U.S. Government only as a commercial end item. Consistent with 48 C.F.R.12.212 - and 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all U.S. Government - End Users acquire the Licensed Deliverables with only those rights set forth - herein. - - Any use of the Licensed Deliverables in individual and commercial software must - include, in the user documentation and internal comments to the code, the above - Disclaimer and U.S. Government End Users Notice. - */ - -#ifndef CUPRINTF_H -#define CUPRINTF_H - -/* - * This is the header file supporting cuPrintf.cu and defining both - * the host and device-side interfaces. See that file for some more - * explanation and sample use code. See also below for details of the - * host-side interfaces. - * - * Quick sample code: - * - #include "cuPrintf.cu" - - __global__ void testKernel(int val) - { - cuPrintf("Value is: %d\n", val); - } - - int main() - { - cudaPrintfInit(); - testKernel<<< 2, 3 >>>(10); - cudaPrintfDisplay(stdout, true); - cudaPrintfEnd(); - return 0; - } - */ - -/////////////////////////////////////////////////////////////////////////////// -// DEVICE SIDE -// External function definitions for device-side code - -// Abuse of templates to simulate varargs -__device__ int cuPrintf(const char *fmt); -template __device__ int cuPrintf(const char *fmt, T1 arg1); -template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2); -template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3); -template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4); -template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5); -template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6); -template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7); -template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8); -template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9); -template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9, T10 arg10); - - -// -// cuPrintfRestrict -// -// Called to restrict output to a given thread/block. Pass -// the constant CUPRINTF_UNRESTRICTED to unrestrict output -// for thread/block IDs. Note you can therefore allow -// "all printfs from block 3" or "printfs from thread 2 -// on all blocks", or "printfs only from block 1, thread 5". -// -// Arguments: -// threadid - Thread ID to allow printfs from -// blockid - Block ID to allow printfs from -// -// NOTE: Restrictions last between invocations of -// kernels unless cudaPrintfInit() is called again. -// -#define CUPRINTF_UNRESTRICTED -1 -__device__ void cuPrintfRestrict(int threadid, int blockid); - - - -/////////////////////////////////////////////////////////////////////////////// -// HOST SIDE -// External function definitions for host-side code - -// -// cudaPrintfInit -// -// Call this once to initialise the printf system. If the output -// file or buffer size needs to be changed, call cudaPrintfEnd() -// before re-calling cudaPrintfInit(). -// -// The default size for the buffer is 1 megabyte. For CUDA -// architecture 1.1 and above, the buffer is filled linearly and -// is completely used; however for architecture 1.0, the buffer -// is divided into as many segments are there are threads, even -// if some threads do not call cuPrintf(). -// -// Arguments: -// bufferLen - Length, in bytes, of total space to reserve -// (in device global memory) for output. -// -// Returns: -// cudaSuccess if all is well. -// -extern "C" cudaError_t cudaPrintfInit(size_t bufferLen=1048576); // 1-meg - that's enough for 4096 printfs by all threads put together - -// -// cudaPrintfEnd -// -// Cleans up all memories allocated by cudaPrintfInit(). -// Call this at exit, or before calling cudaPrintfInit() again. -// -extern "C" void cudaPrintfEnd(); - -// -// cudaPrintfDisplay -// -// Dumps the contents of the output buffer to the specified -// file pointer. If the output pointer is not specified, -// the default "stdout" is used. -// -// Arguments: -// outputFP - A file pointer to an output stream. -// showThreadID - If "true", output strings are prefixed -// by "[blockid, threadid] " at output. -// -// Returns: -// cudaSuccess if all is well. -// -extern "C" cudaError_t cudaPrintfDisplay(void *outputFP=NULL, bool showThreadID=false); - -#endif // CUPRINTF_H diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/libethash.cu b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/libethash.cu deleted file mode 100644 index 3e53c8853..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/libethash.cu +++ /dev/null @@ -1,27 +0,0 @@ -#include "cuPrintf.cu" -#include - -__global__ void device_greetings(void) -{ - cuPrintf("Hello, world from the device!\n"); -} - -int main(void) -{ - // greet from the host - printf("Hello, world from the host!\n"); - - // initialize cuPrintf - cudaPrintfInit(); - - // launch a kernel with a single thread to greet from the device - device_greetings<<<1,1>>>(); - - // display the device's greeting - cudaPrintfDisplay(); - - // clean up after cuPrintf - cudaPrintfEnd(); - - return 0; -} diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/CMakeLists.txt deleted file mode 100644 index bef63ef0a..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/CMakeLists.txt +++ /dev/null @@ -1,33 +0,0 @@ -set(LIBRARY ethash) -set(CMAKE_BUILD_TYPE Release) - -if (NOT MSVC) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu99") -endif() - -set(FILES util.c - util.h - internal.c - ethash.h - endian.h - compiler.h - fnv.h - data_sizes.h) - -if (NOT CRYPTOPP_FOUND) - find_package(CryptoPP 5.6.2) -endif() - -if (CRYPTOPP_FOUND) - add_definitions(-DWITH_CRYPTOPP) - include_directories( ${CRYPTOPP_INCLUDE_DIRS} ) - list(APPEND FILES sha3_cryptopp.cpp sha3_cryptopp.h) -else() - list(APPEND FILES sha3.c sha3.h) -endif() - -add_library(${LIBRARY} ${FILES}) - -if (CRYPTOPP_FOUND) - TARGET_LINK_LIBRARIES(${LIBRARY} ${CRYPTOPP_LIBRARIES}) -endif() \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/compiler.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/compiler.h deleted file mode 100644 index 9695871cd..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/compiler.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - This file is part of cpp-ethereum. - - cpp-ethereum is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - cpp-ethereum is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with cpp-ethereum. If not, see . -*/ -/** @file compiler.h - * @date 2014 - */ -#pragma once - -// Visual Studio doesn't support the inline keyword in C mode -#if defined(_MSC_VER) && !defined(__cplusplus) -#define inline __inline -#endif - -// pretend restrict is a standard keyword -#if defined(_MSC_VER) -#define restrict __restrict -#else -#define restrict __restrict__ -#endif - diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/data_sizes.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/data_sizes.h deleted file mode 100644 index 40417cb71..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/data_sizes.h +++ /dev/null @@ -1,248 +0,0 @@ -/* - This file is part of cpp-ethereum. - - cpp-ethereum is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software FoundationUUU,either version 3 of the LicenseUUU,or - (at your option) any later version. - - cpp-ethereum is distributed in the hope that it will be usefulU, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with cpp-ethereum. If notUUU,see . -*/ - -/** @file nth_prime.h -* @author Matthew Wampler-Doty -* @date 2015 -*/ - -// TODO: Update this after ~7 years - -#pragma once - -#include -//#include -#include "compiler.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -// 500 Epochs worth of tabulated DAG sizes (~3.5 Years) - -// Generated with the following Mathematica Code: -// GetDataSizes[n_] := Module[{ -// DAGSizeBytesInit = 2^30, -// MixBytes = 128, -// DAGGrowth = 113000000, -// j = 0}, -// Reap[ -// While[j < n, -// Module[{i = -// Floor[(DAGSizeBytesInit + DAGGrowth * j) / MixBytes]}, -// While[! PrimeQ[i], i--]; -// Sow[i*MixBytes]; j++]]]][[2]][[1]] - -static const size_t dag_sizes[] = { - 1073739904U, 1186739584U, 1299741568U, 1412741248U, 1525741696U, - 1638736768U, 1751741312U, 1864740736U, 1977740672U, 2090740864U, - 2203740544U, 2316741248U, 2429739392U, 2542740352U, 2655741824U, - 2768739712U, 2881740416U, 2994741632U, 3107740544U, 3220741504U, - 3333738112U, 3446741632U, 3559741312U, 3672740224U, 3785740928U, - 3898738304U, 4011741824U, 4124739712U, 4237735808U, 4350740864U, - 4463741824U, 4576741504U, 4689741184U, 4802739328U, 4915741568U, - 5028740224U, 5141740672U, 5254738304U, 5367741824U, 5480737664U, - 5593738112U, 5706741632U, 5819740544U, 5932734592U, 6045739904U, - 6158740096U, 6271740032U, 6384731776U, 6497732992U, 6610740352U, - 6723741056U, 6836741504U, 6949740416U, 7062740096U, 7175741824U, - 7288740224U, 7401741184U, 7514741632U, 7627741568U, 7740739712U, - 7853739136U, 7966740352U, 8079741568U, 8192739712U, 8305738624U, - 8418740864U, 8531740288U, 8644740736U, 8757735808U, 8870738816U, - 8983739264U, 9096740992U, 9209740928U, 9322739584U, 9435741824U, - 9548741504U, 9661739392U, 9774738304U, 9887741312U, 10000738688U, - 10113739136U, 10226741632U, 10339739776U, 10452741248U, 10565740928U, - 10678736512U, 10791734656U, 10904741248U, 11017738112U, 11130741632U, - 11243741312U, 11356739456U, 11469740416U, 11582734976U, 11695739008U, - 11808741248U, 11921734784U, 12034739072U, 12147741568U, 12260737408U, - 12373741696U, 12486738304U, 12599740544U, 12712740224U, 12825741184U, - 12938736256U, 13051741312U, 13164737408U, 13277738368U, 13390738048U, - 13503741824U, 13616741504U, 13729737088U, 13842740096U, 13955741312U, - 14068741504U, 14181740416U, 14294741632U, 14407739776U, 14520740224U, - 14633740928U, 14746736512U, 14859741824U, 14972740736U, 15085740928U, - 15198738304U, 15311732096U, 15424740736U, 15537739904U, 15650741632U, - 15763741568U, 15876737152U, 15989741696U, 16102740608U, 16215741056U, - 16328741248U, 16441740416U, 16554737792U, 16667740288U, 16780740992U, - 16893738112U, 17006741632U, 17119739008U, 17232735616U, 17345739392U, - 17458740352U, 17571736192U, 17684739712U, 17797739392U, 17910740096U, - 18023741312U, 18136740736U, 18249738112U, 18362738816U, 18475735424U, - 18588740224U, 18701738368U, 18814736768U, 18927737216U, 19040739968U, - 19153739648U, 19266736768U, 19379737984U, 19492739456U, 19605738368U, - 19718740352U, 19831741312U, 19944736384U, 20057741696U, 20170741376U, - 20283741824U, 20396737408U, 20509741696U, 20622741376U, 20735739008U, - 20848741504U, 20961740672U, 21074739328U, 21187740032U, 21300739456U, - 21413741696U, 21526740608U, 21639741824U, 21752737408U, 21865741696U, - 21978741376U, 22091741824U, 22204738432U, 22317740672U, 22430740096U, - 22543736704U, 22656741248U, 22769739904U, 22882739584U, 22995740288U, - 23108740736U, 23221740928U, 23334741376U, 23447737216U, 23560740992U, - 23673741184U, 23786740864U, 23899737728U, 24012741248U, 24125734784U, - 24238736512U, 24351741824U, 24464740736U, 24577737088U, 24690741632U, - 24803739776U, 24916740736U, 25029740416U, 25142740864U, 25255741568U, - 25368741248U, 25481740672U, 25594741376U, 25707741568U, 25820741504U, - 25933730432U, 26046739072U, 26159741824U, 26272741504U, 26385740672U, - 26498740096U, 26611741568U, 26724740992U, 26837739904U, 26950735232U, - 27063738496U, 27176741248U, 27289741184U, 27402740864U, 27515740544U, - 27628737152U, 27741740672U, 27854741632U, 27967740544U, 28080739712U, - 28193738368U, 28306741376U, 28419737728U, 28532739968U, 28645739648U, - 28758740096U, 28871741312U, 28984739456U, 29097740416U, 29210740864U, - 29323741312U, 29436740224U, 29549741696U, 29662738304U, 29775741568U, - 29888741504U, 30001740928U, 30114737024U, 30227735168U, 30340737664U, - 30453738368U, 30566737024U, 30679733632U, 30792740224U, 30905740928U, - 31018740352U, 31131740032U, 31244738944U, 31357737344U, 31470741376U, - 31583740544U, 31696740224U, 31809738112U, 31922739328U, 32035737472U, - 32148740992U, 32261741696U, 32374740352U, 32487741824U, 32600740736U, - 32713739648U, 32826740608U, 32939729792U, 33052740992U, 33165740672U, - 33278739584U, 33391741312U, 33504739712U, 33617740928U, 33730740608U, - 33843738496U, 33956739968U, 34069741696U, 34182739328U, 34295741824U, - 34408739968U, 34521740672U, 34634736512U, 34747741568U, 34860741248U, - 34973739392U, 35086738304U, 35199741056U, 35312736896U, 35425741184U, - 35538741376U, 35651740288U, 35764737152U, 35877741184U, 35990739584U, - 36103740544U, 36216740992U, 36329739392U, 36442737536U, 36555741568U, - 36668740736U, 36781741184U, 36894737024U, 37007741312U, 37120739456U, - 37233741184U, 37346736256U, 37459736192U, 37572734336U, 37685739904U, - 37798740352U, 37911737728U, 38024741504U, 38137739648U, 38250740608U, - 38363741824U, 38476740992U, 38589741184U, 38702740096U, 38815741312U, - 38928741248U, 39041738368U, 39154739584U, 39267741824U, 39380739712U, - 39493735808U, 39606741632U, 39719741312U, 39832741504U, 39945739648U, - 40058740352U, 40171740032U, 40284740992U, 40397740672U, 40510740352U, - 40623740288U, 40736738176U, 40849737856U, 40962741376U, 41075739776U, - 41188737664U, 41301735808U, 41414738048U, 41527741312U, 41640740992U, - 41753739904U, 41866739072U, 41979738496U, 42092740736U, 42205739648U, - 42318740608U, 42431741312U, 42544738688U, 42657741184U, 42770738048U, - 42883741568U, 42996741248U, 43109740928U, 43222736512U, 43335741056U, - 43448730496U, 43561740416U, 43674741632U, 43787740544U, 43900741504U, - 44013739648U, 44126740864U, 44239740544U, 44352741248U, 44465738368U, - 44578735232U, 44691739264U, 44804741504U, 44917741696U, 45030741376U, - 45143741824U, 45256740992U, 45369739136U, 45482740096U, 45595739776U, - 45708739712U, 45821740672U, 45934741376U, 46047741056U, 46160741248U, - 46273737088U, 46386740864U, 46499739008U, 46612739968U, 46725735296U, - 46838740864U, 46951741568U, 47064737152U, 47177741696U, 47290741376U, - 47403738752U, 47516741248U, 47629739648U, 47742741632U, 47855737984U, - 47968740224U, 48081738368U, 48194741632U, 48307739264U, 48420739712U, - 48533739136U, 48646738304U, 48759741824U, 48872741504U, 48985739392U, - 49098741376U, 49211741056U, 49324740992U, 49437738368U, 49550740864U, - 49663735424U, 49776737408U, 49889740672U, 50002738816U, 50115738752U, - 50228739712U, 50341741696U, 50454736768U, 50567738752U, 50680739968U, - 50793736832U, 50906734976U, 51019741568U, 51132739456U, 51245741696U, - 51358741376U, 51471741056U, 51584738944U, 51697734272U, 51810739072U, - 51923736448U, 52036740736U, 52149741184U, 52262737024U, 52375738496U, - 52488740992U, 52601739136U, 52714740352U, 52827736448U, 52940738176U, - 53053741696U, 53166740864U, 53279741824U, 53392741504U, 53505739136U, - 53618739584U, 53731741312U, 53844741248U, 53957741696U, 54070741376U, - 54183740288U, 54296741504U, 54409741696U, 54522739072U, 54635737472U, - 54748741504U, 54861736064U, 54974740096U, 55087741568U, 55200733568U, - 55313741696U, 55426734464U, 55539741056U, 55652741504U, 55765741184U, - 55878741376U, 55991730304U, 56104740992U, 56217740672U, 56330731648U, - 56443737472U, 56556724352U, 56669740672U, 56782739072U, 56895740032U, - 57008741248U, 57121741696U, 57234740096U, 57347741312U, 57460741504U -}; - -// 500 Epochs worth of tabulated DAG sizes (~3.5 Years) - -// Generated with the following Mathematica Code: -// GetCacheSizes[n_] := Module[{ -// DAGSizeBytesInit = 2^30, -// MixBytes = 128, -// DAGGrowth = 113000000, -// HashBytes = 64, -// DAGParents = 1024, -// j = 0}, -// Reap[ -// While[j < n, -// Module[{i = Floor[(DAGSizeBytesInit + DAGGrowth * j) / (DAGParents * HashBytes)]}, -// While[! PrimeQ[i], i--]; -// Sow[i*HashBytes]; j++]]]][[2]][[1]] - -const size_t cache_sizes[] = { - 1048384U, 1158208U, 1268416U, 1377856U, 1489856U, 1599296U, 1710656U, - 1820608U, 1930816U, 2041024U, 2151872U, 2261696U, 2371904U, 2482624U, - 2593216U, 2703296U, 2814016U, 2924224U, 3034816U, 3144896U, 3255488U, - 3365312U, 3475904U, 3586624U, 3696064U, 3806272U, 3917504U, 4027456U, - 4138304U, 4248512U, 4359104U, 4469312U, 4579264U, 4689728U, 4797376U, - 4909888U, 5020096U, 5131328U, 5241664U, 5351744U, 5461312U, 5572544U, - 5683264U, 5793472U, 5903552U, 6014144U, 6121664U, 6235072U, 6344896U, - 6454592U, 6565952U, 6675904U, 6786112U, 6896704U, 7006784U, 7117888U, - 7228096U, 7338304U, 7448768U, 7557952U, 7669184U, 7779776U, 7889216U, - 8000192U, 8110912U, 8220736U, 8331712U, 8441536U, 8552384U, 8662592U, - 8772928U, 8883136U, 8993728U, 9103168U, 9214528U, 9323968U, 9434816U, - 9545152U, 9655616U, 9766336U, 9876544U, 9986624U, 10097344U, 10207424U, - 10316864U, 10427968U, 10538432U, 10649152U, 10758976U, 10869568U, 10979776U, - 11089472U, 11200832U, 11309632U, 11420608U, 11531584U, 11641792U, 11751104U, - 11862976U, 11973184U, 12083264U, 12193856U, 12304064U, 12414656U, 12524608U, - 12635072U, 12745792U, 12855616U, 12965824U, 13076416U, 13187008U, 13297216U, - 13407808U, 13518016U, 13627072U, 13738688U, 13848256U, 13959488U, 14069696U, - 14180288U, 14290624U, 14399552U, 14511424U, 14621504U, 14732096U, 14841664U, - 14951744U, 15062336U, 15172672U, 15283264U, 15393088U, 15504448U, 15614272U, - 15723712U, 15834944U, 15945152U, 16055744U, 16165696U, 16277056U, 16387136U, - 16494784U, 16607936U, 16718272U, 16828736U, 16938176U, 17048384U, 17159872U, - 17266624U, 17380544U, 17490496U, 17600192U, 17711296U, 17821376U, 17931968U, - 18041152U, 18152896U, 18261952U, 18373568U, 18483392U, 18594112U, 18703936U, - 18814912U, 18924992U, 19034944U, 19145408U, 19256128U, 19366208U, 19477184U, - 19587136U, 19696576U, 19808192U, 19916992U, 20028352U, 20137664U, 20249024U, - 20358848U, 20470336U, 20580544U, 20689472U, 20801344U, 20911424U, 21020096U, - 21130688U, 21242176U, 21352384U, 21462208U, 21573824U, 21683392U, 21794624U, - 21904448U, 22013632U, 22125248U, 22235968U, 22344512U, 22456768U, 22566848U, - 22677056U, 22786496U, 22897984U, 23008064U, 23118272U, 23228992U, 23338816U, - 23449408U, 23560256U, 23670464U, 23780672U, 23891264U, 24001216U, 24110656U, - 24221888U, 24332608U, 24442688U, 24552512U, 24662464U, 24773696U, 24884032U, - 24994496U, 25105216U, 25215296U, 25324864U, 25435712U, 25546432U, 25655744U, - 25767232U, 25876672U, 25986368U, 26098112U, 26207936U, 26318912U, 26428736U, - 26539712U, 26650048U, 26760256U, 26869184U, 26979776U, 27091136U, 27201728U, - 27311552U, 27422272U, 27532352U, 27642304U, 27752896U, 27863744U, 27973952U, - 28082752U, 28194752U, 28305344U, 28415168U, 28524992U, 28636352U, 28746304U, - 28857152U, 28967104U, 29077184U, 29187904U, 29298496U, 29408576U, 29518912U, - 29628992U, 29739968U, 29850176U, 29960512U, 30070336U, 30180544U, 30290752U, - 30398912U, 30512192U, 30622784U, 30732992U, 30842176U, 30953536U, 31063744U, - 31174336U, 31284544U, 31395136U, 31504448U, 31615552U, 31725632U, 31835072U, - 31946176U, 32057024U, 32167232U, 32277568U, 32387008U, 32497984U, 32608832U, - 32719168U, 32829376U, 32939584U, 33050048U, 33160768U, 33271232U, 33381184U, - 33491648U, 33601856U, 33712576U, 33822016U, 33932992U, 34042816U, 34153024U, - 34263104U, 34373824U, 34485056U, 34594624U, 34704832U, 34816064U, 34926272U, - 35036224U, 35146816U, 35255104U, 35367104U, 35478208U, 35588416U, 35698496U, - 35808832U, 35918656U, 36029888U, 36139456U, 36250688U, 36360512U, 36471104U, - 36581696U, 36691136U, 36802112U, 36912448U, 37022912U, 37132864U, 37242944U, - 37354048U, 37464512U, 37574848U, 37684928U, 37794752U, 37904704U, 38015552U, - 38125888U, 38236864U, 38345792U, 38457152U, 38567744U, 38678336U, 38787776U, - 38897216U, 39009088U, 39117632U, 39230144U, 39340352U, 39450304U, 39560384U, - 39671488U, 39781312U, 39891392U, 40002112U, 40112704U, 40223168U, 40332608U, - 40443968U, 40553792U, 40664768U, 40774208U, 40884416U, 40993984U, 41105984U, - 41215424U, 41326528U, 41436992U, 41546048U, 41655872U, 41768128U, 41878336U, - 41988928U, 42098752U, 42209344U, 42319168U, 42429248U, 42540352U, 42649792U, - 42761024U, 42871616U, 42981824U, 43092032U, 43201856U, 43312832U, 43423552U, - 43533632U, 43643584U, 43753792U, 43864384U, 43974976U, 44084032U, 44195392U, - 44306368U, 44415296U, 44526016U, 44637248U, 44746816U, 44858048U, 44967872U, - 45078848U, 45188288U, 45299264U, 45409216U, 45518272U, 45630272U, 45740224U, - 45850432U, 45960896U, 46069696U, 46182208U, 46292416U, 46402624U, 46512064U, - 46623296U, 46733888U, 46843712U, 46953664U, 47065024U, 47175104U, 47285696U, - 47395904U, 47506496U, 47615296U, 47726912U, 47837632U, 47947712U, 48055232U, - 48168128U, 48277952U, 48387392U, 48499648U, 48609472U, 48720064U, 48830272U, - 48940096U, 49050944U, 49160896U, 49271744U, 49381568U, 49492288U, 49602752U, - 49712576U, 49822016U, 49934272U, 50042816U, 50154304U, 50264128U, 50374336U, - 50484416U, 50596288U, 50706752U, 50816704U, 50927168U, 51035456U, 51146944U, - 51258176U, 51366976U, 51477824U, 51589568U, 51699776U, 51809728U, 51920576U, - 52030016U, 52140736U, 52251328U, 52361152U, 52470592U, 52582592U, 52691776U, - 52803136U, 52912576U, 53020736U, 53132224U, 53242688U, 53354816U, 53465536U, - 53575232U, 53685568U, 53796544U, 53906752U, 54016832U, 54126656U, 54236992U, - 54347456U, 54457408U, 54569024U, 54679232U, 54789184U, 54899776U, 55008832U, - 55119296U, 55231168U, 55341248U, 55451584U, 55562048U, 55672256U, 55782208U, - 55893184U, 56002112U, 56113216U -}; - -#ifdef __cplusplus -} -#endif \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/endian.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/endian.h deleted file mode 100644 index 9ca842e47..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/endian.h +++ /dev/null @@ -1,74 +0,0 @@ -#pragma once - -#include -#include "compiler.h" - -static const uint8_t BitReverseTable256[] = - { - 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, - 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, - 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, - 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, - 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, - 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, - 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, - 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, - 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, - 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, - 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, - 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, - 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, - 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, - 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, - 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF - }; - -static inline uint32_t bitfn_swap32(uint32_t a) { - return (BitReverseTable256[a & 0xff] << 24) | - (BitReverseTable256[(a >> 8) & 0xff] << 16) | - (BitReverseTable256[(a >> 16) & 0xff] << 8) | - (BitReverseTable256[(a >> 24) & 0xff]); -} - -static inline uint64_t bitfn_swap64(uint64_t a) { - return ((uint64_t) bitfn_swap32((uint32_t) (a >> 32))) | - (((uint64_t) bitfn_swap32((uint32_t) a)) << 32); -} - -#if defined(__MINGW32__) || defined(_WIN32) - # define LITTLE_ENDIAN 1234 - # define BYTE_ORDER LITTLE_ENDIAN -#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__) - # include -#elif defined(__OpenBSD__) || defined(__SVR4) - # include -#elif defined(__APPLE__) -# include -#elif defined( BSD ) && (BSD >= 199103) - # include -#elif defined( __QNXNTO__ ) && defined( __LITTLEENDIAN__ ) - # define LITTLE_ENDIAN 1234 - # define BYTE_ORDER LITTLE_ENDIAN -#elif defined( __QNXNTO__ ) && defined( __BIGENDIAN__ ) - # define BIG_ENDIAN 1234 - # define BYTE_ORDER BIG_ENDIAN -#else - -# include - -#endif - - -#if LITTLE_ENDIAN == BYTE_ORDER - -#define fix_endian32(x) (x) -#define fix_endian64(x) (x) - -#elif BIG_ENDIAN == BYTE_ORDER - -#define fix_endian32(x) bitfn_swap32(x) -#define fix_endian64(x) bitfn_swap64(x) - -#else -# error "endian not supported" -#endif // BYTE_ORDER \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/ethash.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/ethash.h deleted file mode 100644 index 62edd0082..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/ethash.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - This file is part of cpp-ethereum. - - cpp-ethereum is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - cpp-ethereum is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with cpp-ethereum. If not, see . -*/ -/** @file ethash.h -* @date 2015 -*/ -#pragma once - -#include -#include -#include -#include -#include "compiler.h" - -#define REVISION 18 -#define DAGSIZE_BYTES_INIT 1073741824U -#define DAG_GROWTH 113000000U -#define EPOCH_LENGTH 30000U -#define MIX_BYTES 128 -#define DAG_PARENTS 256 -#define CACHE_ROUNDS 3 -#define ACCESSES 64 - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct ethash_params { - size_t full_size; // Size of full data set (in bytes, multiple of mix size (128)). - size_t cache_size; // Size of compute cache (in bytes, multiple of node size (64)). -} ethash_params; - -typedef struct ethash_return_value { - uint8_t result[32]; - uint8_t mix_hash[32]; -} ethash_return_value; - -size_t const ethash_get_datasize(const uint32_t block_number); -size_t const ethash_get_cachesize(const uint32_t block_number); - -// initialize the parameters -static inline void ethash_params_init(ethash_params *params, const uint32_t block_number) { - params->full_size = ethash_get_datasize(block_number); - params->cache_size = ethash_get_cachesize(block_number); -} - -typedef struct ethash_cache { - void *mem; -} ethash_cache; - -void ethash_mkcache(ethash_cache *cache, ethash_params const *params, const uint8_t seed[32]); -void ethash_compute_full_data(void *mem, ethash_params const *params, ethash_cache const *cache); -void ethash_full(ethash_return_value *ret, void const *full_mem, ethash_params const *params, const uint8_t header_hash[32], const uint64_t nonce); -void ethash_light(ethash_return_value *ret, ethash_cache const *cache, ethash_params const *params, const uint8_t header_hash[32], const uint64_t nonce); - -static inline int ethash_check_difficulty( - const uint8_t hash[32], - const uint8_t difficulty[32]) { - // Difficulty is big endian - for (int i = 0; i < 32; i++) { - if (hash[i] == difficulty[i]) continue; - return hash[i] < difficulty[i]; - } - return 0; -} - -int ethash_quick_check_difficulty( - const uint8_t header_hash[32], - const uint64_t nonce, - const uint8_t mix_hash[32], - const uint8_t difficulty[32]); - -#ifdef __cplusplus -} -#endif diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/fnv.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/fnv.h deleted file mode 100644 index edabeaae2..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/fnv.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - This file is part of cpp-ethereum. - - cpp-ethereum is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - cpp-ethereum is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with cpp-ethereum. If not, see . -*/ -/** @file fnv.h -* @author Matthew Wampler-Doty -* @date 2015 -*/ - -#pragma once -#include -#include "compiler.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define FNV_PRIME 0x01000193 - -static inline uint32_t fnv_hash(const uint32_t x, const uint32_t y) { - return x*FNV_PRIME ^ y; -} - -#ifdef __cplusplus -} -#endif \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.c b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.c deleted file mode 100644 index cc48717d0..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.c +++ /dev/null @@ -1,297 +0,0 @@ -/* - This file is part of cpp-ethereum. - - cpp-ethereum is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - cpp-ethereum is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with cpp-ethereum. If not, see . -*/ -/** @file dash.cpp -* @author Tim Hughes -* @author Matthew Wampler-Doty -* @date 2015 -*/ - -#include -#include -#include -#include "ethash.h" -#include "fnv.h" -#include "endian.h" -#include "internal.h" -#include "data_sizes.h" - -#ifdef WITH_CRYPTOPP - -#include "SHA3_cryptopp.h" - -#else -#include "sha3.h" -#endif // WITH_CRYPTOPP - -size_t const ethash_get_datasize(const uint32_t block_number) { - assert(block_number / EPOCH_LENGTH < 500); - return dag_sizes[block_number / EPOCH_LENGTH]; -} - -size_t const ethash_get_cachesize(const uint32_t block_number) { - assert(block_number / EPOCH_LENGTH < 500); - return cache_sizes[block_number / EPOCH_LENGTH]; -} - -// Follows Sergio's "STRICT MEMORY HARD HASHING FUNCTIONS" (2014) -// https://bitslog.files.wordpress.com/2013/12/memohash-v0-3.pdf -// SeqMemoHash(s, R, N) -void static ethash_compute_cache_nodes( - node *const nodes, - ethash_params const *params, - const uint8_t seed[32]) { - assert((params->cache_size % sizeof(node)) == 0); - uint32_t const num_nodes = (uint32_t)(params->cache_size / sizeof(node)); - - SHA3_512(nodes[0].bytes, seed, 32); - - for (unsigned i = 1; i != num_nodes; ++i) { - SHA3_512(nodes[i].bytes, nodes[i - 1].bytes, 64); - } - - for (unsigned j = 0; j != CACHE_ROUNDS; j++) { - for (unsigned i = 0; i != num_nodes; i++) { - uint32_t const idx = nodes[i].words[0] % num_nodes; - node data; - data = nodes[(num_nodes - 1 + i) % num_nodes]; - for (unsigned w = 0; w != NODE_WORDS; ++w) - { - data.words[w] ^= nodes[idx].words[w]; - } - SHA3_512(nodes[i].bytes, data.bytes, sizeof(data)); - } - } - - // now perform endian conversion -#if BYTE_ORDER != LITTLE_ENDIAN - for (unsigned w = 0; w != (num_nodes*NODE_WORDS); ++w) - { - nodes->words[w] = fix_endian32(nodes->words[w]); - } -#endif -} - -void ethash_mkcache( - ethash_cache *cache, - ethash_params const *params, - const uint8_t seed[32]) { - node *nodes = (node *) cache->mem; - ethash_compute_cache_nodes(nodes, params, seed); -} - -void ethash_calculate_dag_item( - node *const ret, - const unsigned node_index, - const struct ethash_params *params, - const struct ethash_cache *cache) { - - uint32_t num_parent_nodes = (uint32_t)(params->cache_size / sizeof(node)); - node const *cache_nodes = (node const *) cache->mem; - node const *init = &cache_nodes[node_index % num_parent_nodes]; - - memcpy(ret, init, sizeof(node)); - ret->words[0] ^= node_index; - SHA3_512(ret->bytes, ret->bytes, sizeof(node)); - -#if defined(_M_X64) && ENABLE_SSE - __m128i const fnv_prime = _mm_set1_epi32(FNV_PRIME); - __m128i xmm0 = ret->xmm[0]; - __m128i xmm1 = ret->xmm[1]; - __m128i xmm2 = ret->xmm[2]; - __m128i xmm3 = ret->xmm[3]; -#endif - - for (unsigned i = 0; i != DAG_PARENTS; ++i) - { - uint32_t parent_index = ((node_index ^ i)*FNV_PRIME ^ ret->words[i % NODE_WORDS]) % num_parent_nodes; - node const *parent = &cache_nodes[parent_index]; - - #if defined(_M_X64) && ENABLE_SSE - { - xmm0 = _mm_mullo_epi32(xmm0, fnv_prime); - xmm1 = _mm_mullo_epi32(xmm1, fnv_prime); - xmm2 = _mm_mullo_epi32(xmm2, fnv_prime); - xmm3 = _mm_mullo_epi32(xmm3, fnv_prime); - xmm0 = _mm_xor_si128(xmm0, parent->xmm[0]); - xmm1 = _mm_xor_si128(xmm1, parent->xmm[1]); - xmm2 = _mm_xor_si128(xmm2, parent->xmm[2]); - xmm3 = _mm_xor_si128(xmm3, parent->xmm[3]); - - // have to write to ret as values are used to compute index - ret->xmm[0] = xmm0; - ret->xmm[1] = xmm1; - ret->xmm[2] = xmm2; - ret->xmm[3] = xmm3; - } - #else - { - for (unsigned w = 0; w != NODE_WORDS; ++w) { - ret->words[w] = fnv_hash(ret->words[w], parent->words[w]); - } - } - #endif - } - - SHA3_512(ret->bytes, ret->bytes, sizeof(node)); -} - -void ethash_compute_full_data( - void *mem, - ethash_params const *params, - ethash_cache const *cache) { - assert((params->full_size % (sizeof(uint32_t) * MIX_WORDS)) == 0); - assert((params->full_size % sizeof(node)) == 0); - node *full_nodes = mem; - - // now compute full nodes - for (unsigned n = 0; n != (params->full_size / sizeof(node)); ++n) { - ethash_calculate_dag_item(&(full_nodes[n]), n, params, cache); - } -} - -static void ethash_hash( - ethash_return_value * ret, - node const *full_nodes, - ethash_cache const *cache, - ethash_params const *params, - const uint8_t header_hash[32], - const uint64_t nonce) { - - assert((params->full_size % MIX_WORDS) == 0); - - // pack hash and nonce together into first 40 bytes of s_mix - assert(sizeof(node)*8 == 512); - node s_mix[MIX_NODES + 1]; - memcpy(s_mix[0].bytes, header_hash, 32); - -#if BYTE_ORDER != LITTLE_ENDIAN - s_mix[0].double_words[4] = fix_endian64(nonce); -#else - s_mix[0].double_words[4] = nonce; -#endif - - // compute sha3-512 hash and replicate across mix - SHA3_512(s_mix->bytes, s_mix->bytes, 40); - -#if BYTE_ORDER != LITTLE_ENDIAN - for (unsigned w = 0; w != 16; ++w) { - s_mix[0].words[w] = fix_endian32(s_mix[0].words[w]); - } -#endif - - node* const mix = s_mix + 1; - for (unsigned w = 0; w != MIX_WORDS; ++w) { - mix->words[w] = s_mix[0].words[w % NODE_WORDS]; - } - - unsigned const - page_size = sizeof(uint32_t) * MIX_WORDS, - num_full_pages = (unsigned)(params->full_size / page_size); - - - for (unsigned i = 0; i != ACCESSES; ++i) - { - uint32_t const index = ((s_mix->words[0] ^ i)*FNV_PRIME ^ mix->words[i % MIX_WORDS]) % num_full_pages; - - for (unsigned n = 0; n != MIX_NODES; ++n) - { - const node * dag_node = &full_nodes[MIX_NODES * index + n]; - - if (!full_nodes) { - node tmp_node; - ethash_calculate_dag_item(&tmp_node, index * MIX_NODES + n, params, cache); - dag_node = &tmp_node; - } - - #if defined(_M_X64) && ENABLE_SSE - { - __m128i fnv_prime = _mm_set1_epi32(FNV_PRIME); - __m128i xmm0 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[0]); - __m128i xmm1 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[1]); - __m128i xmm2 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[2]); - __m128i xmm3 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[3]); - mix[n].xmm[0] = _mm_xor_si128(xmm0, dag_node->xmm[0]); - mix[n].xmm[1] = _mm_xor_si128(xmm1, dag_node->xmm[1]); - mix[n].xmm[2] = _mm_xor_si128(xmm2, dag_node->xmm[2]); - mix[n].xmm[3] = _mm_xor_si128(xmm3, dag_node->xmm[3]); - } - #else - { - for (unsigned w = 0; w != NODE_WORDS; ++w) { - mix[n].words[w] = fnv_hash(mix[n].words[w], dag_node->words[w]); - } - } - #endif - } - - } - - // compress mix - for (unsigned w = 0; w != MIX_WORDS; w += 4) - { - uint32_t reduction = mix->words[w+0]; - reduction = reduction*FNV_PRIME ^ mix->words[w+1]; - reduction = reduction*FNV_PRIME ^ mix->words[w+2]; - reduction = reduction*FNV_PRIME ^ mix->words[w+3]; - mix->words[w/4] = reduction; - } - -#if BYTE_ORDER != LITTLE_ENDIAN - for (unsigned w = 0; w != MIX_WORDS/4; ++w) { - mix->words[w] = fix_endian32(mix->words[w]); - } -#endif - - memcpy(ret->mix_hash, mix->bytes, 32); - // final Keccak hash - SHA3_256(ret->result, s_mix->bytes, 64+32); // Keccak-256(s + compressed_mix) -} - -void ethash_quick_hash( - uint8_t return_hash[32], - const uint8_t header_hash[32], - const uint64_t nonce, - const uint8_t mix_hash[32]) { - - uint8_t buf[64+32]; - memcpy(buf, header_hash, 32); -#if BYTE_ORDER != LITTLE_ENDIAN - nonce = fix_endian64(nonce); -#endif - memcpy(&(buf[32]), &nonce, 8); - SHA3_512(buf, buf, 40); - memcpy(&(buf[64]), mix_hash, 32); - SHA3_256(return_hash, buf, 64+32); -} - -int ethash_quick_check_difficulty( - const uint8_t header_hash[32], - const uint64_t nonce, - const uint8_t mix_hash[32], - const uint8_t difficulty[32]) { - uint8_t return_hash[32]; - ethash_quick_hash(return_hash, header_hash, nonce, mix_hash); - return ethash_check_difficulty(return_hash, difficulty); -} - -void ethash_full(ethash_return_value * ret, void const *full_mem, ethash_params const *params, const uint8_t previous_hash[32], const uint64_t nonce) { - ethash_hash(ret, (node const *) full_mem, NULL, params, previous_hash, nonce); -} - -void ethash_light(ethash_return_value * ret, ethash_cache const *cache, ethash_params const *params, const uint8_t previous_hash[32], const uint64_t nonce) { - ethash_hash(ret, NULL, cache, params, previous_hash, nonce); -} diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.h deleted file mode 100644 index bcbacdaa4..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.h +++ /dev/null @@ -1,48 +0,0 @@ -#pragma once -#include "compiler.h" -#include "endian.h" -#include "ethash.h" - -#define ENABLE_SSE 1 - -#if defined(_M_X64) && ENABLE_SSE -#include -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -// compile time settings -#define NODE_WORDS (64/4) -#define MIX_WORDS (MIX_BYTES/4) -#define MIX_NODES (MIX_WORDS / NODE_WORDS) -#include - -typedef union node { - uint8_t bytes[NODE_WORDS * 4]; - uint32_t words[NODE_WORDS]; - uint64_t double_words[NODE_WORDS / 2]; - -#if defined(_M_X64) && ENABLE_SSE - __m128i xmm[NODE_WORDS/4]; -#endif - -} node; - -void ethash_calculate_dag_item( - node *const ret, - const unsigned node_index, - ethash_params const *params, - ethash_cache const *cache -); - -void ethash_quick_hash( - uint8_t return_hash[32], - const uint8_t header_hash[32], - const uint64_t nonce, - const uint8_t mix_hash[32]); - -#ifdef __cplusplus -} -#endif \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.c b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.c deleted file mode 100644 index 0c28230b8..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.c +++ /dev/null @@ -1,151 +0,0 @@ -/** libkeccak-tiny -* -* A single-file implementation of SHA-3 and SHAKE. -* -* Implementor: David Leon Gil -* License: CC0, attribution kindly requested. Blame taken too, -* but not liability. -*/ -#include "sha3.h" - -#include -#include -#include -#include - -/******** The Keccak-f[1600] permutation ********/ - -/*** Constants. ***/ -static const uint8_t rho[24] = \ - { 1, 3, 6, 10, 15, 21, - 28, 36, 45, 55, 2, 14, - 27, 41, 56, 8, 25, 43, - 62, 18, 39, 61, 20, 44}; -static const uint8_t pi[24] = \ - {10, 7, 11, 17, 18, 3, - 5, 16, 8, 21, 24, 4, - 15, 23, 19, 13, 12, 2, - 20, 14, 22, 9, 6, 1}; -static const uint64_t RC[24] = \ - {1ULL, 0x8082ULL, 0x800000000000808aULL, 0x8000000080008000ULL, - 0x808bULL, 0x80000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL, - 0x8aULL, 0x88ULL, 0x80008009ULL, 0x8000000aULL, - 0x8000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL, - 0x8000000000008002ULL, 0x8000000000000080ULL, 0x800aULL, 0x800000008000000aULL, - 0x8000000080008081ULL, 0x8000000000008080ULL, 0x80000001ULL, 0x8000000080008008ULL}; - -/*** Helper macros to unroll the permutation. ***/ -#define rol(x, s) (((x) << s) | ((x) >> (64 - s))) -#define REPEAT6(e) e e e e e e -#define REPEAT24(e) REPEAT6(e e e e) -#define REPEAT5(e) e e e e e -#define FOR5(v, s, e) \ - v = 0; \ - REPEAT5(e; v += s;) - -/*** Keccak-f[1600] ***/ -static inline void keccakf(void* state) { - uint64_t* a = (uint64_t*)state; - uint64_t b[5] = {0}; - uint64_t t = 0; - uint8_t x, y; - - for (int i = 0; i < 24; i++) { - // Theta - FOR5(x, 1, - b[x] = 0; - FOR5(y, 5, - b[x] ^= a[x + y]; )) - FOR5(x, 1, - FOR5(y, 5, - a[y + x] ^= b[(x + 4) % 5] ^ rol(b[(x + 1) % 5], 1); )) - // Rho and pi - t = a[1]; - x = 0; - REPEAT24(b[0] = a[pi[x]]; - a[pi[x]] = rol(t, rho[x]); - t = b[0]; - x++; ) - // Chi - FOR5(y, - 5, - FOR5(x, 1, - b[x] = a[y + x];) - FOR5(x, 1, - a[y + x] = b[x] ^ ((~b[(x + 1) % 5]) & b[(x + 2) % 5]); )) - // Iota - a[0] ^= RC[i]; - } -} - -/******** The FIPS202-defined functions. ********/ - -/*** Some helper macros. ***/ - -#define _(S) do { S } while (0) -#define FOR(i, ST, L, S) \ - _(for (size_t i = 0; i < L; i += ST) { S; }) -#define mkapply_ds(NAME, S) \ - static inline void NAME(uint8_t* dst, \ - const uint8_t* src, \ - size_t len) { \ - FOR(i, 1, len, S); \ - } -#define mkapply_sd(NAME, S) \ - static inline void NAME(const uint8_t* src, \ - uint8_t* dst, \ - size_t len) { \ - FOR(i, 1, len, S); \ - } - -mkapply_ds(xorin, dst[i] ^= src[i]) // xorin -mkapply_sd(setout, dst[i] = src[i]) // setout - -#define P keccakf -#define Plen 200 - -// Fold P*F over the full blocks of an input. -#define foldP(I, L, F) \ - while (L >= rate) { \ - F(a, I, rate); \ - P(a); \ - I += rate; \ - L -= rate; \ - } - -/** The sponge-based hash construction. **/ -static inline int hash(uint8_t* out, size_t outlen, - const uint8_t* in, size_t inlen, - size_t rate, uint8_t delim) { - if ((out == NULL) || ((in == NULL) && inlen != 0) || (rate >= Plen)) { - return -1; - } - uint8_t a[Plen] = {0}; - // Absorb input. - foldP(in, inlen, xorin); - // Xor in the DS and pad frame. - a[inlen] ^= delim; - a[rate - 1] ^= 0x80; - // Xor in the last block. - xorin(a, in, inlen); - // Apply P - P(a); - // Squeeze output. - foldP(out, outlen, setout); - setout(a, out, outlen); - memset(a, 0, 200); - return 0; -} - -#define defsha3(bits) \ - int sha3_##bits(uint8_t* out, size_t outlen, \ - const uint8_t* in, size_t inlen) { \ - if (outlen > (bits/8)) { \ - return -1; \ - } \ - return hash(out, outlen, in, inlen, 200 - (bits / 4), 0x01); \ - } - -/*** FIPS202 SHA3 FOFs ***/ -defsha3(256) -defsha3(512) \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.h deleted file mode 100644 index 36a0a5301..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -#include "compiler.h" -#include -#include - -#define decsha3(bits) \ - int sha3_##bits(uint8_t*, size_t, const uint8_t*, size_t); - -decsha3(256) -decsha3(512) - -static inline void SHA3_256(uint8_t * const ret, uint8_t const *data, const size_t size) { - sha3_256(ret, 32, data, size); -} - -static inline void SHA3_512(uint8_t * const ret, uint8_t const *data, const size_t size) { - sha3_512(ret, 64, data, size); -} - -#ifdef __cplusplus -} -#endif \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.cpp b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.cpp deleted file mode 100644 index 9454ce04a..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/* - This file is part of cpp-ethereum. - - cpp-ethereum is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - cpp-ethereum is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with cpp-ethereum. If not, see . -*/ - -/** @file sha3.cpp -* @author Tim Hughes -* @date 2015 -*/ - -#include -#include - -extern "C" { -void SHA3_256(uint8_t *const ret, const uint8_t *data, size_t size) { - CryptoPP::SHA3_256().CalculateDigest(ret, data, size); -} - -void SHA3_512(uint8_t *const ret, const uint8_t *data, size_t size) { - CryptoPP::SHA3_512().CalculateDigest(ret, data, size); -} -} \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.h deleted file mode 100644 index f910960e1..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include "compiler.h" -#include - -#ifdef __cplusplus -extern "C" { -#endif - -void SHA3_256(uint8_t *const ret, const uint8_t *data, size_t size); -void SHA3_512(uint8_t *const ret, const uint8_t *data, size_t size); - -#ifdef __cplusplus -} -#endif \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.c b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.c deleted file mode 100644 index fbf268b7d..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - This file is part of cpp-ethereum. - - cpp-ethereum is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - cpp-ethereum is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with cpp-ethereum. If not, see . -*/ -/** @file util.c - * @author Tim Hughes - * @date 2015 - */ -#include -#include -#include "util.h" - -#ifdef _MSC_VER - -// foward declare without all of Windows.h -__declspec(dllimport) void __stdcall OutputDebugStringA(const char* lpOutputString); - -void debugf(const char *str, ...) -{ - va_list args; - va_start(args, str); - - char buf[1<<16]; - _vsnprintf_s(buf, sizeof(buf), sizeof(buf), str, args); - buf[sizeof(buf)-1] = '\0'; - OutputDebugStringA(buf); -} - -#endif diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.h deleted file mode 100644 index 2f59076f6..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - This file is part of cpp-ethereum. - - cpp-ethereum is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - cpp-ethereum is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with cpp-ethereum. If not, see . -*/ -/** @file util.h - * @author Tim Hughes - * @date 2015 - */ -#pragma once -#include -#include "compiler.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _MSC_VER -void debugf(const char *str, ...); -#else -#define debugf printf -#endif - -static inline uint32_t min_u32(uint32_t a, uint32_t b) -{ - return a < b ? a : b; -} - -static inline uint32_t clamp_u32(uint32_t x, uint32_t min_, uint32_t max_) -{ - return x < min_ ? min_ : (x > max_ ? max_ : x); -} - -#ifdef __cplusplus -} -#endif diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/binding.gyp b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/binding.gyp deleted file mode 100644 index 642c33cb3..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/binding.gyp +++ /dev/null @@ -1,29 +0,0 @@ -{ - "targets": - [{ - "target_name": "ethash", - "sources": [ - './ethash.cc', - '../libethash/ethash.h', - '../libethash/util.c', - '../libethash/util.h', - '../libethash/blum_blum_shub.h', - '../libethash/blum_blum_shub.c', - '../libethash/sha3.h', - '../libethash/sha3.c', - '../libethash/internal.h', - '../libethash/internal.c' - ], - "include_dirs": [ - "../", - " -#include -#include -#include -#include -#include "../libethash/ethash.h" - -using namespace v8; - -class EthashValidator : public NanAsyncWorker { - public: - // Constructor - EthashValidator(NanCallback *callback, const unsigned blocknumber, const unsigned char * seed) - : NanAsyncWorker(callback), blocknumber(blocknumber), seed(seed) {} - // Destructor - ~EthashValidator() { - free(this->cache); - free(this->params); - } - - // Executed inside the worker-thread. - // It is not safe to access V8, or V8 data structures - // here, so everything we need for input and output - // should go on `this`. - void Execute () { - - /* this->result = secp256k1_ecdsa_sign(this->msg, this->sig , &this->sig_len, this->pk, NULL, NULL); */ - } - - // Executed when the async work is complete - // this function will be run inside the main event loop - // so it is safe to use V8 again - void HandleOKCallback () { - NanScope(); - Handle argv[] = { - NanNew(this->result) - }; - callback->Call(2, argv); - } - - protected: - const unsigned blocknumber; - const unsigned char * seed; - ethash_params * params; - ethash_cache * cache; - bool result; - bool ready = 0; -}; - -/* class CompactSignWorker : public SignWorker { */ -/* public: */ -/* CompactSignWorker(NanCallback *callback, const unsigned char *msg, const unsigned char *pk ) */ -/* : SignWorker(callback, msg, pk){} */ - -/* void Execute () { */ -/* this->result = secp256k1_ecdsa_sign_compact(this->msg, this->sig , this->pk, NULL, NULL, &this->sig_len); */ -/* } */ - -/* void HandleOKCallback () { */ -/* NanScope(); */ -/* Handle argv[] = { */ -/* NanNew(this->result), */ -/* NanNewBufferHandle((char *)this->sig, 64), */ -/* NanNew(this->sig_len) */ -/* }; */ -/* callback->Call(3, argv); */ -/* } */ -/* }; */ - -/* class RecoverWorker : public NanAsyncWorker { */ -/* public: */ -/* // Constructor */ -/* RecoverWorker(NanCallback *callback, const unsigned char *msg, const unsigned char *sig, int compressed, int rec_id) */ -/* : NanAsyncWorker(callback), msg(msg), sig(sig), compressed(compressed), rec_id(rec_id) {} */ -/* // Destructor */ -/* ~RecoverWorker() {} */ - -/* void Execute () { */ -/* if(this->compressed == 1){ */ -/* this->pubkey = new unsigned char[33]; */ -/* }else{ */ -/* this->pubkey = new unsigned char[65]; */ -/* } */ - -/* this->result = secp256k1_ecdsa_recover_compact(this->msg, this->sig, this->pubkey, &this->pubkey_len, this->compressed, this->rec_id); */ -/* } */ - -/* void HandleOKCallback () { */ -/* NanScope(); */ -/* Handle argv[] = { */ -/* NanNew(this->result), */ -/* NanNewBufferHandle((char *)this->pubkey, this->pubkey_len) */ -/* }; */ -/* callback->Call(2, argv); */ -/* } */ - -/* protected: */ -/* const unsigned char * msg; */ -/* const unsigned char * sig; */ -/* int compressed; */ -/* int rec_id; */ -/* int result; */ -/* unsigned char * pubkey; */ -/* int pubkey_len; */ -/* }; */ - -/* class VerifyWorker : public NanAsyncWorker { */ -/* public: */ -/* // Constructor */ -/* VerifyWorker(NanCallback *callback, const unsigned char *msg, const unsigned char *sig, int sig_len, const unsigned char *pub_key, int pub_key_len) */ -/* : NanAsyncWorker(callback), msg(msg), sig(sig), sig_len(sig_len), pub_key(pub_key), pub_key_len(pub_key_len) {} */ -/* // Destructor */ -/* ~VerifyWorker() {} */ - -/* void Execute () { */ -/* this->result = secp256k1_ecdsa_verify(this->msg, this->sig, this->sig_len, this->pub_key, this->pub_key_len); */ -/* } */ - -/* void HandleOKCallback () { */ -/* NanScope(); */ -/* Handle argv[] = { */ -/* NanNew(this->result), */ -/* }; */ -/* callback->Call(1, argv); */ -/* } */ - -/* protected: */ -/* int result; */ -/* const unsigned char * msg; */ -/* const unsigned char * sig; */ -/* int sig_len; */ -/* const unsigned char * pub_key; */ -/* int pub_key_len; */ -/* }; */ - -/* NAN_METHOD(Verify){ */ -/* NanScope(); */ - -/* Local pub_buf = args[0].As(); */ -/* const unsigned char *pub_data = (unsigned char *) node::Buffer::Data(pub_buf); */ -/* int pub_len = node::Buffer::Length(args[0]); */ - -/* Local msg_buf = args[1].As(); */ -/* const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */ - -/* Local sig_buf = args[2].As(); */ -/* const unsigned char *sig_data = (unsigned char *) node::Buffer::Data(sig_buf); */ -/* int sig_len = node::Buffer::Length(args[2]); */ - -/* int result = secp256k1_ecdsa_verify(msg_data, sig_data, sig_len, pub_data, pub_len ); */ - -/* NanReturnValue(NanNew(result)); */ -/* } */ - -/* NAN_METHOD(Verify_Async){ */ -/* NanScope(); */ - -/* Local pub_buf = args[0].As(); */ -/* const unsigned char *pub_data = (unsigned char *) node::Buffer::Data(pub_buf); */ -/* int pub_len = node::Buffer::Length(args[0]); */ - -/* Local msg_buf = args[1].As(); */ -/* const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */ - -/* Local sig_buf = args[2].As(); */ -/* const unsigned char *sig_data = (unsigned char *) node::Buffer::Data(sig_buf); */ -/* int sig_len = node::Buffer::Length(args[2]); */ - -/* Local callback = args[3].As(); */ -/* NanCallback* nanCallback = new NanCallback(callback); */ - -/* VerifyWorker* worker = new VerifyWorker(nanCallback, msg_data, sig_data, sig_len, pub_data, pub_len); */ -/* NanAsyncQueueWorker(worker); */ - -/* NanReturnUndefined(); */ -/* } */ - -/* NAN_METHOD(Sign){ */ -/* NanScope(); */ - -/* //the first argument should be the private key as a buffer */ -/* Local pk_buf = args[0].As(); */ -/* const unsigned char *pk_data = (unsigned char *) node::Buffer::Data(pk_buf); */ -/* int sec_len = node::Buffer::Length(args[0]); */ -/* //the second argument is the message that we are signing */ -/* Local msg_buf = args[1].As(); */ -/* const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */ - -/* unsigned char sig[72]; */ -/* int sig_len = 72; */ -/* int msg_len = node::Buffer::Length(args[1]); */ - -/* if(sec_len != 32){ */ -/* return NanThrowError("the secret key needs tobe 32 bytes"); */ -/* } */ - -/* if(msg_len == 0){ */ -/* return NanThrowError("messgae cannot be null"); */ -/* } */ - -/* int result = secp256k1_ecdsa_sign(msg_data, sig , &sig_len, pk_data, NULL, NULL); */ - -/* if(result == 1){ */ -/* NanReturnValue(NanNewBufferHandle((char *)sig, sig_len)); */ -/* }else{ */ -/* return NanThrowError("nonce invalid, try another one"); */ -/* } */ -/* } */ - -/* NAN_METHOD(Sign_Async){ */ - -/* NanScope(); */ -/* //the first argument should be the private key as a buffer */ -/* Local sec_buf = args[0].As(); */ -/* const unsigned char *sec_data = (unsigned char *) node::Buffer::Data(sec_buf); */ -/* int sec_len = node::Buffer::Length(args[0]); */ -/* //the second argument is the message that we are signing */ -/* Local msg_buf = args[1].As(); */ -/* const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */ - -/* Local callback = args[2].As(); */ -/* NanCallback* nanCallback = new NanCallback(callback); */ - -/* int msg_len = node::Buffer::Length(args[1]); */ - -/* if(sec_len != 32){ */ -/* return NanThrowError("the secret key needs tobe 32 bytes"); */ -/* } */ - -/* if(msg_len == 0){ */ -/* return NanThrowError("messgae cannot be null"); */ -/* } */ - -/* SignWorker* worker = new SignWorker(nanCallback, msg_data, sec_data); */ -/* NanAsyncQueueWorker(worker); */ - -/* NanReturnUndefined(); */ -/* } */ - -/* NAN_METHOD(Sign_Compact){ */ - -/* NanScope(); */ - -/* Local seckey_buf = args[0].As(); */ -/* const unsigned char *seckey_data = (unsigned char *) node::Buffer::Data(seckey_buf); */ -/* int sec_len = node::Buffer::Length(args[0]); */ - -/* Local msg_buf = args[1].As(); */ -/* const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */ -/* int msg_len = node::Buffer::Length(args[1]); */ - -/* if(sec_len != 32){ */ -/* return NanThrowError("the secret key needs tobe 32 bytes"); */ -/* } */ - -/* if(msg_len == 0){ */ -/* return NanThrowError("messgae cannot be null"); */ -/* } */ - -/* unsigned char sig[64]; */ -/* int rec_id; */ - -/* //TODO: change the nonce */ -/* int valid_nonce = secp256k1_ecdsa_sign_compact(msg_data, sig, seckey_data, NULL, NULL, &rec_id ); */ - -/* Local array = NanNew(3); */ -/* array->Set(0, NanNew(valid_nonce)); */ -/* array->Set(1, NanNew(rec_id)); */ -/* array->Set(2, NanNewBufferHandle((char *)sig, 64)); */ - -/* NanReturnValue(array); */ -/* } */ - -/* NAN_METHOD(Sign_Compact_Async){ */ -/* NanScope(); */ -/* //the first argument should be the private key as a buffer */ -/* Local sec_buf = args[0].As(); */ -/* const unsigned char *sec_data = (unsigned char *) node::Buffer::Data(sec_buf); */ -/* int sec_len = node::Buffer::Length(args[0]); */ - -/* //the second argument is the message that we are signing */ -/* Local msg_buf = args[1].As(); */ -/* const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */ - - -/* Local callback = args[2].As(); */ -/* NanCallback* nanCallback = new NanCallback(callback); */ - -/* int msg_len = node::Buffer::Length(args[1]); */ - -/* if(sec_len != 32){ */ -/* return NanThrowError("the secret key needs tobe 32 bytes"); */ -/* } */ - -/* if(msg_len == 0){ */ -/* return NanThrowError("messgae cannot be null"); */ -/* } */ - -/* CompactSignWorker* worker = new CompactSignWorker(nanCallback, msg_data, sec_data); */ -/* NanAsyncQueueWorker(worker); */ - -/* NanReturnUndefined(); */ -/* } */ - -/* NAN_METHOD(Recover_Compact){ */ - -/* NanScope(); */ - -/* Local msg_buf = args[0].As(); */ -/* const unsigned char *msg = (unsigned char *) node::Buffer::Data(msg_buf); */ -/* int msg_len = node::Buffer::Length(args[0]); */ - -/* Local sig_buf = args[1].As(); */ -/* const unsigned char *sig = (unsigned char *) node::Buffer::Data(sig_buf); */ - -/* Local compressed = args[2].As(); */ -/* int int_compressed = compressed->IntegerValue(); */ - -/* Local rec_id = args[3].As(); */ -/* int int_rec_id = rec_id->IntegerValue(); */ - -/* if(msg_len == 0){ */ -/* return NanThrowError("messgae cannot be null"); */ -/* } */ - -/* unsigned char pubKey[65]; */ - -/* int pubKeyLen; */ - -/* int result = secp256k1_ecdsa_recover_compact(msg, sig, pubKey, &pubKeyLen, int_compressed, int_rec_id); */ -/* if(result == 1){ */ -/* NanReturnValue(NanNewBufferHandle((char *)pubKey, pubKeyLen)); */ -/* }else{ */ - -/* NanReturnValue(NanFalse()); */ -/* } */ -/* } */ - -/* NAN_METHOD(Recover_Compact_Async){ */ - -/* NanScope(); */ - -/* //the message */ -/* Local msg_buf = args[0].As(); */ -/* const unsigned char *msg = (unsigned char *) node::Buffer::Data(msg_buf); */ -/* int msg_len = node::Buffer::Length(args[0]); */ - -/* //the signature length */ -/* Local sig_buf = args[1].As(); */ -/* const unsigned char *sig = (unsigned char *) node::Buffer::Data(sig_buf); */ -/* //todo sig len needs tobe 64 */ -/* int sig_len = node::Buffer::Length(args[1]); */ - -/* //to compress or not? */ -/* Local compressed = args[2].As(); */ -/* int int_compressed = compressed->IntegerValue(); */ - -/* //the rec_id */ -/* Local rec_id = args[3].As(); */ -/* int int_rec_id = rec_id->IntegerValue(); */ - -/* //the callback */ -/* Local callback = args[4].As(); */ -/* NanCallback* nanCallback = new NanCallback(callback); */ - -/* if(sig_len != 64){ */ -/* return NanThrowError("the signature needs to be 64 bytes"); */ -/* } */ - -/* if(msg_len == 0){ */ -/* return NanThrowError("messgae cannot be null"); */ -/* } */ - -/* RecoverWorker* worker = new RecoverWorker(nanCallback, msg, sig, int_compressed, int_rec_id); */ -/* NanAsyncQueueWorker(worker); */ - -/* NanReturnUndefined(); */ -/* } */ - -/* NAN_METHOD(Seckey_Verify){ */ -/* NanScope(); */ - -/* const unsigned char *data = (const unsigned char*) node::Buffer::Data(args[0]); */ -/* int result = secp256k1_ec_seckey_verify(data); */ -/* NanReturnValue(NanNew(result)); */ -/* } */ - -/* NAN_METHOD(Pubkey_Verify){ */ - -/* NanScope(); */ - -/* Local pub_buf = args[0].As(); */ -/* const unsigned char *pub_key = (unsigned char *) node::Buffer::Data(pub_buf); */ -/* int pub_key_len = node::Buffer::Length(args[0]); */ - -/* int result = secp256k1_ec_pubkey_verify(pub_key, pub_key_len); */ - -/* NanReturnValue(NanNew(result)); */ -/* } */ - -/* NAN_METHOD(Pubkey_Create){ */ -/* NanScope(); */ - -/* Handle pk_buf = args[0].As(); */ -/* const unsigned char *pk_data = (unsigned char *) node::Buffer::Data(pk_buf); */ -/* int pk_len = node::Buffer::Length(args[0]); */ - -/* Local l_compact = args[1].As(); */ -/* int compact = l_compact->IntegerValue(); */ -/* int pubKeyLen; */ - -/* if(pk_len != 32){ */ -/* return NanThrowError("the secert key need to be 32 bytes"); */ -/* } */ - -/* unsigned char *pubKey; */ -/* if(compact == 1){ */ -/* pubKey = new unsigned char[33]; */ -/* }else{ */ -/* pubKey = new unsigned char[65]; */ -/* } */ - -/* int results = secp256k1_ec_pubkey_create(pubKey,&pubKeyLen, pk_data, compact ); */ -/* if(results == 0){ */ -/* return NanThrowError("secret was invalid, try again."); */ -/* }else{ */ -/* NanReturnValue(NanNewBufferHandle((char *)pubKey, pubKeyLen)); */ -/* } */ -/* } */ - -/* NAN_METHOD(Pubkey_Decompress){ */ -/* NanScope(); */ - -/* //the first argument should be the private key as a buffer */ -/* Local pk_buf = args[0].As(); */ -/* unsigned char *pk_data = (unsigned char *) node::Buffer::Data(pk_buf); */ - -/* int pk_len = node::Buffer::Length(args[0]); */ - -/* int results = secp256k1_ec_pubkey_decompress(pk_data, &pk_len); */ - -/* if(results == 0){ */ -/* return NanThrowError("invalid public key"); */ -/* }else{ */ -/* NanReturnValue(NanNewBufferHandle((char *)pk_data, pk_len)); */ -/* } */ -/* } */ - - -/* NAN_METHOD(Privkey_Import){ */ -/* NanScope(); */ - -/* //the first argument should be the private key as a buffer */ -/* Handle pk_buf = args[0].As(); */ -/* const unsigned char *pk_data = (unsigned char *) node::Buffer::Data(pk_buf); */ - -/* int pk_len = node::Buffer::Length(args[0]); */ - -/* unsigned char sec_key[32]; */ -/* int results = secp256k1_ec_privkey_import(sec_key, pk_data, pk_len); */ - -/* if(results == 0){ */ -/* return NanThrowError("invalid private key"); */ -/* }else{ */ -/* NanReturnValue(NanNewBufferHandle((char *)sec_key, 32)); */ -/* } */ -/* } */ - -/* NAN_METHOD(Privkey_Export){ */ -/* NanScope(); */ - -/* //the first argument should be the private key as a buffer */ -/* Handle sk_buf = args[0].As(); */ -/* const unsigned char *sk_data = (unsigned char *) node::Buffer::Data(sk_buf); */ - -/* Local l_compressed = args[1].As(); */ -/* int compressed = l_compressed->IntegerValue(); */ - -/* unsigned char *privKey; */ -/* int pk_len; */ -/* int results = secp256k1_ec_privkey_export(sk_data, privKey, &pk_len, compressed); */ -/* if(results == 0){ */ -/* return NanThrowError("invalid private key"); */ -/* }else{ */ -/* NanReturnValue(NanNewBufferHandle((char *)privKey, pk_len)); */ -/* } */ -/* } */ - -/* NAN_METHOD(Privkey_Tweak_Add){ */ -/* NanScope(); */ - -/* //the first argument should be the private key as a buffer */ -/* Handle sk_buf = args[0].As(); */ -/* unsigned char *sk = (unsigned char *) node::Buffer::Data(sk_buf); */ - -/* Handle tweak_buf = args[1].As(); */ -/* const unsigned char *tweak= (unsigned char *) node::Buffer::Data(tweak_buf); */ - -/* int results = secp256k1_ec_privkey_tweak_add(sk, tweak); */ -/* if(results == 0){ */ -/* return NanThrowError("invalid key"); */ -/* }else{ */ -/* NanReturnValue(NanNewBufferHandle((char *)sk, 32)); */ -/* } */ -/* } */ - -/* NAN_METHOD(Privkey_Tweak_Mul){ */ -/* NanScope(); */ - -/* //the first argument should be the private key as a buffer */ -/* Handle sk_buf = args[0].As(); */ -/* unsigned char *sk = (unsigned char *) node::Buffer::Data(sk_buf); */ - -/* Handle tweak_buf = args[1].As(); */ -/* const unsigned char *tweak= (unsigned char *) node::Buffer::Data(tweak_buf); */ - -/* int results = secp256k1_ec_privkey_tweak_mul(sk, tweak); */ -/* if(results == 0){ */ -/* return NanThrowError("invalid key"); */ -/* }else{ */ -/* NanReturnValue(NanNewBufferHandle((char *)sk, 32)); */ -/* } */ -/* } */ - -/* NAN_METHOD(Pubkey_Tweak_Add){ */ -/* NanScope(); */ - -/* //the first argument should be the private key as a buffer */ -/* Handle pk_buf = args[0].As(); */ -/* unsigned char *pk = (unsigned char *) node::Buffer::Data(pk_buf); */ -/* int pk_len = node::Buffer::Length(args[0]); */ - -/* Handle tweak_buf = args[1].As(); */ -/* const unsigned char *tweak= (unsigned char *) node::Buffer::Data(tweak_buf); */ - -/* int results = secp256k1_ec_pubkey_tweak_add(pk, pk_len, tweak); */ -/* if(results == 0){ */ -/* return NanThrowError("invalid key"); */ -/* }else{ */ -/* NanReturnValue(NanNewBufferHandle((char *)pk, pk_len)); */ -/* } */ -/* } */ - -/* NAN_METHOD(Pubkey_Tweak_Mul){ */ -/* NanScope(); */ - -/* //the first argument should be the private key as a buffer */ -/* Handle pk_buf = args[0].As(); */ -/* unsigned char *pk = (unsigned char *) node::Buffer::Data(pk_buf); */ -/* int pk_len = node::Buffer::Length(args[0]); */ - -/* Handle tweak_buf = args[1].As(); */ -/* const unsigned char *tweak= (unsigned char *) node::Buffer::Data(tweak_buf); */ - -/* int results = secp256k1_ec_pubkey_tweak_mul(pk, pk_len, tweak); */ -/* if(results == 0){ */ -/* return NanThrowError("invalid key"); */ -/* }else{ */ -/* NanReturnValue(NanNewBufferHandle((char *)pk, pk_len)); */ -/* } */ -/* } */ - -void Init(Handle exports) { - - /* secp256k1_start(SECP256K1_START_SIGN | SECP256K1_START_VERIFY); */ - /* exports->Set(NanNew("seckeyVerify"), NanNew(Seckey_Verify)->GetFunction()); */ - /* exports->Set(NanNew("sign"), NanNew(Sign)->GetFunction()); */ - /* exports->Set(NanNew("signAsync"), NanNew(Sign_Async)->GetFunction()); */ - /* exports->Set(NanNew("signCompact"), NanNew(Sign_Compact)->GetFunction()); */ - /* exports->Set(NanNew("signCompactAsync"), NanNew(Sign_Compact_Async)->GetFunction()); */ - /* exports->Set(NanNew("recoverCompact"), NanNew(Recover_Compact)->GetFunction()); */ - /* exports->Set(NanNew("recoverCompactAsync"), NanNew(Recover_Compact_Async)->GetFunction()); */ - /* exports->Set(NanNew("verify"), NanNew(Verify)->GetFunction()); */ - /* exports->Set(NanNew("verifyAsync"), NanNew(Verify_Async)->GetFunction()); */ - /* exports->Set(NanNew("secKeyVerify"), NanNew(Seckey_Verify)->GetFunction()); */ - /* exports->Set(NanNew("pubKeyVerify"), NanNew(Pubkey_Verify)->GetFunction()); */ - /* exports->Set(NanNew("pubKeyCreate"), NanNew(Pubkey_Create)->GetFunction()); */ - /* exports->Set(NanNew("pubKeyDecompress"), NanNew(Pubkey_Decompress)->GetFunction()); */ - /* exports->Set(NanNew("privKeyExport"), NanNew(Privkey_Export)->GetFunction()); */ - /* exports->Set(NanNew("privKeyImport"), NanNew(Privkey_Import)->GetFunction()); */ - /* exports->Set(NanNew("privKeyTweakAdd"), NanNew(Privkey_Tweak_Add)->GetFunction()); */ - /* exports->Set(NanNew("privKeyTweakMul"), NanNew(Privkey_Tweak_Mul)->GetFunction()); */ - /* exports->Set(NanNew("pubKeyTweakAdd"), NanNew(Privkey_Tweak_Add)->GetFunction()); */ - /* exports->Set(NanNew("pubKeyTweakMul"), NanNew(Privkey_Tweak_Mul)->GetFunction()); */ -} - -NODE_MODULE(secp256k1, Init) diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/package.json b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/package.json deleted file mode 100644 index 690ea3263..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/package.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "name": "node-ethash", - "version": "1.0.0", - "description": "", - "main": "index.js", - "scripts": { - "test": "echo \"Error: no test specified\" && exit 1", - "install": "node-gyp rebuild" - }, - "author": "", - "license": "ISC", - "gypfile": true -} diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/readme.md b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/readme.md deleted file mode 100644 index bb46cdd6e..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/readme.md +++ /dev/null @@ -1,12 +0,0 @@ -# To Develop -`npm install -g node-gyp` -`npm install .` - - -# To rebuild -`node-gyp rebuild` - - -# notes - -nan is good https://github.com/rvagg/nan diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/pyethash/.gitignore b/Godeps/_workspace/src/github.com/ethereum/ethash/pyethash/.gitignore new file mode 100644 index 000000000..e1374b866 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/pyethash/.gitignore @@ -0,0 +1,2 @@ +pyethash.egg-info/ +*.so diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/pyethash/__init__.py b/Godeps/_workspace/src/github.com/ethereum/ethash/pyethash/__init__.py new file mode 100644 index 000000000..fca037dba --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/pyethash/__init__.py @@ -0,0 +1,3 @@ +import pyethash.core +core = pyethash.core +EPOCH_LENGTH = 30000 diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/setup.py b/Godeps/_workspace/src/github.com/ethereum/ethash/setup.py new file mode 100644 index 000000000..4e27aad6c --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/setup.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +from distutils.core import setup, Extension + +pyethash_core = Extension('pyethash.core', + sources = [ + 'src/python/core.c', + 'src/libethash/util.c', + 'src/libethash/internal.c', + 'src/libethash/sha3.c' + ], + extra_compile_args = ["-std=gnu99"]) + +setup ( + name = 'pyethash', + author = "Matthew Wampler-Doty", + author_email = "matthew.wampler.doty@gmail.com", + license = 'GPL', + version = '1.0', + description = 'Python wrappers for ethash, the ethereum proof of work hashing function', + ext_modules = [pyethash_core], + ) diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/benchmark/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/src/benchmark/CMakeLists.txt new file mode 100644 index 000000000..e6ba85790 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/benchmark/CMakeLists.txt @@ -0,0 +1,53 @@ +include_directories(..) + +set(CMAKE_BUILD_TYPE Release) + +if (MSVC) + add_definitions("/openmp") +endif() + +if (NOT MPI_FOUND) + find_package(MPI) +endif() + +if (NOT CRYPTOPP_FOUND) + find_package(CryptoPP 5.6.2) +endif() + +if (CRYPTOPP_FOUND) + add_definitions(-DWITH_CRYPTOPP) +endif() + +if (NOT OpenCL_FOUND) + find_package(OpenCL) +endif() +if (OpenCL_FOUND) + add_definitions(-DWITH_OPENCL) + include_directories(${OpenCL_INCLUDE_DIRS}) + list(APPEND FILES ethash_cl_miner.cpp ethash_cl_miner.h) +endif() + +if (MPI_FOUND) + include_directories(${MPI_INCLUDE_PATH}) + add_executable (Benchmark_MPI_FULL benchmark.cpp) + target_link_libraries (Benchmark_MPI_FULL ${ETHHASH_LIBS} ${MPI_LIBRARIES}) + SET_TARGET_PROPERTIES(Benchmark_MPI_FULL PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} ${MPI_COMPILE_FLAGS} -DFULL -DMPI") + + add_executable (Benchmark_MPI_LIGHT benchmark.cpp) + target_link_libraries (Benchmark_MPI_LIGHT ${ETHHASH_LIBS} ${MPI_LIBRARIES}) + SET_TARGET_PROPERTIES(Benchmark_MPI_LIGHT PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} ${MPI_COMPILE_FLAGS} -DMPI") +endif() + +add_executable (Benchmark_FULL benchmark.cpp) +target_link_libraries (Benchmark_FULL ${ETHHASH_LIBS}) +SET_TARGET_PROPERTIES(Benchmark_FULL PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} -DFULL") + +add_executable (Benchmark_LIGHT benchmark.cpp) +target_link_libraries (Benchmark_LIGHT ${ETHHASH_LIBS}) + +if (OpenCL_FOUND) + add_executable (Benchmark_CL benchmark.cpp) + target_link_libraries (Benchmark_CL ${ETHHASH_LIBS} ethash-cl) + SET_TARGET_PROPERTIES(Benchmark_CL PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} -DOPENCL") +endif() + diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/benchmark/benchmark.cpp b/Godeps/_workspace/src/github.com/ethereum/ethash/src/benchmark/benchmark.cpp new file mode 100644 index 000000000..4c8f700c5 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/benchmark/benchmark.cpp @@ -0,0 +1,260 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file benchmark.cpp + * @author Tim Hughes + * @date 2015 + */ + +#include +#include +#include +#include +#include +#ifdef OPENCL +#include +#endif +#include +#include + +#ifdef WITH_CRYPTOPP +#include +#include + +#else +#include "libethash/sha3.h" +#endif // WITH_CRYPTOPP + +#undef min +#undef max + +#if defined(OPENCL) +const unsigned trials = 1024*1024*32; +#elif defined(FULL) +const unsigned trials = 1024*1024/8; +#else +const unsigned trials = 1024*1024/1024; +#endif +uint8_t g_hashes[1024*32]; + +static char nibbleToChar(unsigned nibble) +{ + return (char) ((nibble >= 10 ? 'a'-10 : '0') + nibble); +} + +static uint8_t charToNibble(char chr) +{ + if (chr >= '0' && chr <= '9') + { + return (uint8_t) (chr - '0'); + } + if (chr >= 'a' && chr <= 'z') + { + return (uint8_t) (chr - 'a' + 10); + } + if (chr >= 'A' && chr <= 'Z') + { + return (uint8_t) (chr - 'A' + 10); + } + return 0; +} + +static std::vector hexStringToBytes(char const* str) +{ + std::vector bytes(strlen(str) >> 1); + for (unsigned i = 0; i != bytes.size(); ++i) + { + bytes[i] = charToNibble(str[i*2 | 0]) << 4; + bytes[i] |= charToNibble(str[i*2 | 1]); + } + return bytes; +} + +static std::string bytesToHexString(uint8_t const* bytes, unsigned size) +{ + std::string str; + for (unsigned i = 0; i != size; ++i) + { + str += nibbleToChar(bytes[i] >> 4); + str += nibbleToChar(bytes[i] & 0xf); + } + return str; +} + +extern "C" int main(void) +{ + // params for ethash + ethash_params params; + ethash_params_init(¶ms, 0); + //params.full_size = 262147 * 4096; // 1GBish; + //params.full_size = 32771 * 4096; // 128MBish; + //params.full_size = 8209 * 4096; // 8MBish; + //params.cache_size = 8209*4096; + //params.cache_size = 2053*4096; + uint8_t seed[32], previous_hash[32]; + + memcpy(seed, hexStringToBytes("9410b944535a83d9adf6bbdcc80e051f30676173c16ca0d32d6f1263fc246466").data(), 32); + memcpy(previous_hash, hexStringToBytes("c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470").data(), 32); + + // allocate page aligned buffer for dataset +#ifdef FULL + void* full_mem_buf = malloc(params.full_size + 4095); + void* full_mem = (void*)((uintptr_t(full_mem_buf) + 4095) & ~4095); +#endif + void* cache_mem_buf = malloc(params.cache_size + 63); + void* cache_mem = (void*)((uintptr_t(cache_mem_buf) + 63) & ~63); + + ethash_cache cache; + cache.mem = cache_mem; + + // compute cache or full data + { + clock_t startTime = clock(); + ethash_mkcache(&cache, ¶ms, seed); + clock_t time = clock() - startTime; + + uint8_t cache_hash[32]; + SHA3_256(cache_hash, (uint8_t const*)cache_mem, params.cache_size); + debugf("ethash_mkcache: %ums, sha3: %s\n", (unsigned)((time*1000)/CLOCKS_PER_SEC), bytesToHexString(cache_hash,sizeof(cache_hash)).data()); + + // print a couple of test hashes + { + const clock_t startTime = clock(); + ethash_return_value hash; + ethash_light(&hash, &cache, ¶ms, previous_hash, 0); + const clock_t time = clock() - startTime; + debugf("ethash_light test: %ums, %s\n", (unsigned)((time*1000)/CLOCKS_PER_SEC), bytesToHexString(hash.result, 32).data()); + } + + #ifdef FULL + startTime = clock(); + ethash_compute_full_data(full_mem, ¶ms, &cache); + time = clock() - startTime; + debugf("ethash_compute_full_data: %ums\n", (unsigned)((time*1000)/CLOCKS_PER_SEC)); + #endif // FULL + } + +#ifdef OPENCL + ethash_cl_miner miner; + { + const clock_t startTime = clock(); + if (!miner.init(params, seed)) + exit(-1); + const clock_t time = clock() - startTime; + debugf("ethash_cl_miner init: %ums\n", (unsigned)((time*1000)/CLOCKS_PER_SEC)); + } +#endif + + +#ifdef FULL + { + const clock_t startTime = clock(); + ethash_return_value hash; + ethash_full(&hash, full_mem, ¶ms, previous_hash, 0); + const clock_t time = clock() - startTime; + debugf("ethash_full test: %uns, %s\n", (unsigned)((time*1000000)/CLOCKS_PER_SEC), bytesToHexString(hash.result, 32).data()); + } +#endif + +#ifdef OPENCL + // validate 1024 hashes against CPU + miner.hash(g_hashes, previous_hash, 0, 1024); + for (unsigned i = 0; i != 1024; ++i) + { + ethash_return_value hash; + ethash_light(&hash, &cache, ¶ms, previous_hash, i); + if (memcmp(hash.result, g_hashes + 32*i, 32) != 0) + { + debugf("nonce %u failed: %s %s\n", i, bytesToHexString(g_hashes + 32*i, 32).c_str(), bytesToHexString(hash.result, 32).c_str()); + static unsigned c = 0; + if (++c == 16) + { + exit(-1); + } + } + } +#endif + + + clock_t startTime = clock(); + unsigned hash_count = trials; + + #ifdef OPENCL + { + struct search_hook : ethash_cl_miner::search_hook + { + unsigned hash_count; + std::vector nonce_vec; + + virtual bool found(uint64_t const* nonces, uint32_t count) + { + nonce_vec.assign(nonces, nonces + count); + return false; + } + + virtual bool searched(uint64_t start_nonce, uint32_t count) + { + // do nothing + hash_count += count; + return hash_count >= trials; + } + }; + search_hook hook; + hook.hash_count = 0; + + miner.search(previous_hash, 0x000000ffffffffff, hook); + + for (unsigned i = 0; i != hook.nonce_vec.size(); ++i) + { + uint64_t nonce = hook.nonce_vec[i]; + ethash_return_value hash; + ethash_light(&hash, &cache, ¶ms, previous_hash, nonce); + debugf("found: %.8x%.8x -> %s\n", unsigned(nonce>>32), unsigned(nonce), bytesToHexString(hash.result, 32).c_str()); + } + + hash_count = hook.hash_count; + } + #else + { + //#pragma omp parallel for + for (int nonce = 0; nonce < trials; ++nonce) + { + ethash_return_value hash; + #ifdef FULL + ethash_full(&hash, full_mem, ¶ms, previous_hash, nonce); + #else + ethash_light(&hash, &cache, ¶ms, previous_hash, nonce); + #endif // FULL + } + } + #endif + + clock_t time = std::max((clock_t)1u, clock() - startTime); + + unsigned read_size = ACCESSES * MIX_BYTES; + debugf( + "hashrate: %8u, bw: %6u MB/s\n", + (unsigned)(((uint64_t)hash_count*CLOCKS_PER_SEC)/time), + (unsigned)((((uint64_t)hash_count*read_size*CLOCKS_PER_SEC)/time) / (1024*1024)) + ); + + free(cache_mem_buf); +#ifdef FULL + free(full_mem_buf); +#endif + + return 0; +} diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/CMakeLists.txt new file mode 100644 index 000000000..fe563aae4 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/CMakeLists.txt @@ -0,0 +1,15 @@ +set(LIBRARY ethash-cl) +set(CMAKE_BUILD_TYPE Release) + +include(bin2h.cmake) +bin2h(SOURCE_FILE ethash_cl_miner_kernel.cl VARIABLE_NAME ethash_cl_miner_kernel HEADER_FILE ${CMAKE_CURRENT_BINARY_DIR}/ethash_cl_miner_kernel.h) + +if (NOT OpenCL_FOUND) + find_package(OpenCL) +endif() +if (OpenCL_FOUND) + include_directories(${OpenCL_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR}) + include_directories(..) + add_library(${LIBRARY} ethash_cl_miner.cpp ethash_cl_miner.h) + TARGET_LINK_LIBRARIES(${LIBRARY} ${OpenCL_LIBRARIES} ethash) +endif() diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/bin2h.cmake b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/bin2h.cmake new file mode 100644 index 000000000..e224c1a67 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/bin2h.cmake @@ -0,0 +1,87 @@ +# https://gist.github.com/sivachandran/3a0de157dccef822a230 +include(CMakeParseArguments) + +# Function to wrap a given string into multiple lines at the given column position. +# Parameters: +# VARIABLE - The name of the CMake variable holding the string. +# AT_COLUMN - The column position at which string will be wrapped. +function(WRAP_STRING) + set(oneValueArgs VARIABLE AT_COLUMN) + cmake_parse_arguments(WRAP_STRING "${options}" "${oneValueArgs}" "" ${ARGN}) + + string(LENGTH ${${WRAP_STRING_VARIABLE}} stringLength) + math(EXPR offset "0") + + while(stringLength GREATER 0) + + if(stringLength GREATER ${WRAP_STRING_AT_COLUMN}) + math(EXPR length "${WRAP_STRING_AT_COLUMN}") + else() + math(EXPR length "${stringLength}") + endif() + + string(SUBSTRING ${${WRAP_STRING_VARIABLE}} ${offset} ${length} line) + set(lines "${lines}\n${line}") + + math(EXPR stringLength "${stringLength} - ${length}") + math(EXPR offset "${offset} + ${length}") + endwhile() + + set(${WRAP_STRING_VARIABLE} "${lines}" PARENT_SCOPE) +endfunction() + +# Function to embed contents of a file as byte array in C/C++ header file(.h). The header file +# will contain a byte array and integer variable holding the size of the array. +# Parameters +# SOURCE_FILE - The path of source file whose contents will be embedded in the header file. +# VARIABLE_NAME - The name of the variable for the byte array. The string "_SIZE" will be append +# to this name and will be used a variable name for size variable. +# HEADER_FILE - The path of header file. +# APPEND - If specified appends to the header file instead of overwriting it +# NULL_TERMINATE - If specified a null byte(zero) will be append to the byte array. This will be +# useful if the source file is a text file and we want to use the file contents +# as string. But the size variable holds size of the byte array without this +# null byte. +# Usage: +# bin2h(SOURCE_FILE "Logo.png" HEADER_FILE "Logo.h" VARIABLE_NAME "LOGO_PNG") +function(BIN2H) + set(options APPEND NULL_TERMINATE) + set(oneValueArgs SOURCE_FILE VARIABLE_NAME HEADER_FILE) + cmake_parse_arguments(BIN2H "${options}" "${oneValueArgs}" "" ${ARGN}) + + # reads source file contents as hex string + file(READ ${BIN2H_SOURCE_FILE} hexString HEX) + string(LENGTH ${hexString} hexStringLength) + + # appends null byte if asked + if(BIN2H_NULL_TERMINATE) + set(hexString "${hexString}00") + endif() + + # wraps the hex string into multiple lines at column 32(i.e. 16 bytes per line) + wrap_string(VARIABLE hexString AT_COLUMN 32) + math(EXPR arraySize "${hexStringLength} / 2") + + # adds '0x' prefix and comma suffix before and after every byte respectively + string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1, " arrayValues ${hexString}) + # removes trailing comma + string(REGEX REPLACE ", $" "" arrayValues ${arrayValues}) + + # converts the variable name into proper C identifier + # TODO: fix for legacy cmake + IF (${CMAKE_VERSION} GREATER 2.8.10) + string(MAKE_C_IDENTIFIER "${BIN2H_VARIABLE_NAME}" BIN2H_VARIABLE_NAME) + ENDIF() + string(TOUPPER "${BIN2H_VARIABLE_NAME}" BIN2H_VARIABLE_NAME) + + # declares byte array and the length variables + set(arrayDefinition "const unsigned char ${BIN2H_VARIABLE_NAME}[] = { ${arrayValues} };") + set(arraySizeDefinition "const size_t ${BIN2H_VARIABLE_NAME}_SIZE = ${arraySize};") + + set(declarations "${arrayDefinition}\n\n${arraySizeDefinition}\n\n") + if(BIN2H_APPEND) + file(APPEND ${BIN2H_HEADER_FILE} "${declarations}") + else() + file(WRITE ${BIN2H_HEADER_FILE} "${declarations}") + endif() +endfunction() diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/cl.hpp b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/cl.hpp new file mode 100644 index 000000000..38fac1962 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/cl.hpp @@ -0,0 +1,12452 @@ +/******************************************************************************* + * Copyright (c) 2008-2013 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/*! \file + * + * \brief C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33) and + * OpenCL 1.2 (rev 15) + * \author Benedict R. Gaster, Laurent Morichetti and Lee Howes + * + * Additions and fixes from: + * Brian Cole, March 3rd 2010 and April 2012 + * Matt Gruenke, April 2012. + * Bruce Merry, February 2013. + * Tom Deakin and Simon McIntosh-Smith, July 2013 + * + * \version 1.2.6 + * \date August 2013 + * + * Optional extension support + * + * cl + * cl_ext_device_fission + * #define USE_CL_DEVICE_FISSION + */ + +/*! \mainpage + * \section intro Introduction + * For many large applications C++ is the language of choice and so it seems + * reasonable to define C++ bindings for OpenCL. + * + * + * The interface is contained with a single C++ header file \em cl.hpp and all + * definitions are contained within the namespace \em cl. There is no additional + * requirement to include \em cl.h and to use either the C++ or original C + * bindings it is enough to simply include \em cl.hpp. + * + * The bindings themselves are lightweight and correspond closely to the + * underlying C API. Using the C++ bindings introduces no additional execution + * overhead. + * + * For detail documentation on the bindings see: + * + * The OpenCL C++ Wrapper API 1.2 (revision 09) + * http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.2.pdf + * + * \section example Example + * + * The following example shows a general use case for the C++ + * bindings, including support for the optional exception feature and + * also the supplied vector and string classes, see following sections for + * decriptions of these features. + * + * \code + * #define __CL_ENABLE_EXCEPTIONS + * + * #if defined(__APPLE__) || defined(__MACOSX) + * #include + * #else + * #include + * #endif + * #include + * #include + * #include + * + * const char * helloStr = "__kernel void " + * "hello(void) " + * "{ " + * " " + * "} "; + * + * int + * main(void) + * { + * cl_int err = CL_SUCCESS; + * try { + * + * std::vector platforms; + * cl::Platform::get(&platforms); + * if (platforms.size() == 0) { + * std::cout << "Platform size 0\n"; + * return -1; + * } + * + * cl_context_properties properties[] = + * { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; + * cl::Context context(CL_DEVICE_TYPE_CPU, properties); + * + * std::vector devices = context.getInfo(); + * + * cl::Program::Sources source(1, + * std::make_pair(helloStr,strlen(helloStr))); + * cl::Program program_ = cl::Program(context, source); + * program_.build(devices); + * + * cl::Kernel kernel(program_, "hello", &err); + * + * cl::Event event; + * cl::CommandQueue queue(context, devices[0], 0, &err); + * queue.enqueueNDRangeKernel( + * kernel, + * cl::NullRange, + * cl::NDRange(4,4), + * cl::NullRange, + * NULL, + * &event); + * + * event.wait(); + * } + * catch (cl::Error err) { + * std::cerr + * << "ERROR: " + * << err.what() + * << "(" + * << err.err() + * << ")" + * << std::endl; + * } + * + * return EXIT_SUCCESS; + * } + * + * \endcode + * + */ +#ifndef CL_HPP_ +#define CL_HPP_ + +#ifdef _WIN32 + +#include +#include +#include +#include + +#if defined(__CL_ENABLE_EXCEPTIONS) +#include +#endif // #if defined(__CL_ENABLE_EXCEPTIONS) + +#pragma push_macro("max") +#undef max +#if defined(USE_DX_INTEROP) +#include +#include +#endif +#endif // _WIN32 + +// +#if defined(USE_CL_DEVICE_FISSION) +#include +#endif + +#if defined(__APPLE__) || defined(__MACOSX) +#include +#include +#include +#else +#include +#include +#endif // !__APPLE__ + +// To avoid accidentally taking ownership of core OpenCL types +// such as cl_kernel constructors are made explicit +// under OpenCL 1.2 +#if defined(CL_VERSION_1_2) && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#define __CL_EXPLICIT_CONSTRUCTORS explicit +#else // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#define __CL_EXPLICIT_CONSTRUCTORS +#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + +// Define deprecated prefixes and suffixes to ensure compilation +// in case they are not pre-defined +#if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) +#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED +#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) +#if !defined(CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED) +#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED +#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) + +#if !defined(CL_CALLBACK) +#define CL_CALLBACK +#endif //CL_CALLBACK + +#include +#include + +#if !defined(__NO_STD_VECTOR) +#include +#endif + +#if !defined(__NO_STD_STRING) +#include +#endif + +#if defined(linux) || defined(__APPLE__) || defined(__MACOSX) +#include + +#include +#include +#endif // linux + +#include + + +/*! \namespace cl + * + * \brief The OpenCL C++ bindings are defined within this namespace. + * + */ +namespace cl { + +class Memory; + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) +#define __INIT_CL_EXT_FCN_PTR(name) \ + if(!pfn_##name) { \ + pfn_##name = (PFN_##name) \ + clGetExtensionFunctionAddress(#name); \ + if(!pfn_##name) { \ + } \ + } +#endif // #if defined(CL_VERSION_1_1) + +#if defined(CL_VERSION_1_2) +#define __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, name) \ + if(!pfn_##name) { \ + pfn_##name = (PFN_##name) \ + clGetExtensionFunctionAddressForPlatform(platform, #name); \ + if(!pfn_##name) { \ + } \ + } +#endif // #if defined(CL_VERSION_1_1) + +class Program; +class Device; +class Context; +class CommandQueue; +class Memory; +class Buffer; + +#if defined(__CL_ENABLE_EXCEPTIONS) +/*! \brief Exception class + * + * This may be thrown by API functions when __CL_ENABLE_EXCEPTIONS is defined. + */ +class Error : public std::exception +{ +private: + cl_int err_; + const char * errStr_; +public: + /*! \brief Create a new CL error exception for a given error code + * and corresponding message. + * + * \param err error code value. + * + * \param errStr a descriptive string that must remain in scope until + * handling of the exception has concluded. If set, it + * will be returned by what(). + */ + Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) + {} + + ~Error() throw() {} + + /*! \brief Get error string associated with exception + * + * \return A memory pointer to the error message string. + */ + virtual const char * what() const throw () + { + if (errStr_ == NULL) { + return "empty"; + } + else { + return errStr_; + } + } + + /*! \brief Get error code associated with exception + * + * \return The error code. + */ + cl_int err(void) const { return err_; } +}; + +#define __ERR_STR(x) #x +#else +#define __ERR_STR(x) NULL +#endif // __CL_ENABLE_EXCEPTIONS + + +namespace detail +{ +#if defined(__CL_ENABLE_EXCEPTIONS) +static inline cl_int errHandler ( + cl_int err, + const char * errStr = NULL) +{ + if (err != CL_SUCCESS) { + throw Error(err, errStr); + } + return err; +} +#else +static inline cl_int errHandler (cl_int err, const char * errStr = NULL) +{ + (void) errStr; // suppress unused variable warning + return err; +} +#endif // __CL_ENABLE_EXCEPTIONS +} + + + +//! \cond DOXYGEN_DETAIL +#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) +#define __GET_DEVICE_INFO_ERR __ERR_STR(clGetDeviceInfo) +#define __GET_PLATFORM_INFO_ERR __ERR_STR(clGetPlatformInfo) +#define __GET_DEVICE_IDS_ERR __ERR_STR(clGetDeviceIDs) +#define __GET_PLATFORM_IDS_ERR __ERR_STR(clGetPlatformIDs) +#define __GET_CONTEXT_INFO_ERR __ERR_STR(clGetContextInfo) +#define __GET_EVENT_INFO_ERR __ERR_STR(clGetEventInfo) +#define __GET_EVENT_PROFILE_INFO_ERR __ERR_STR(clGetEventProfileInfo) +#define __GET_MEM_OBJECT_INFO_ERR __ERR_STR(clGetMemObjectInfo) +#define __GET_IMAGE_INFO_ERR __ERR_STR(clGetImageInfo) +#define __GET_SAMPLER_INFO_ERR __ERR_STR(clGetSamplerInfo) +#define __GET_KERNEL_INFO_ERR __ERR_STR(clGetKernelInfo) +#if defined(CL_VERSION_1_2) +#define __GET_KERNEL_ARG_INFO_ERR __ERR_STR(clGetKernelArgInfo) +#endif // #if defined(CL_VERSION_1_2) +#define __GET_KERNEL_WORK_GROUP_INFO_ERR __ERR_STR(clGetKernelWorkGroupInfo) +#define __GET_PROGRAM_INFO_ERR __ERR_STR(clGetProgramInfo) +#define __GET_PROGRAM_BUILD_INFO_ERR __ERR_STR(clGetProgramBuildInfo) +#define __GET_COMMAND_QUEUE_INFO_ERR __ERR_STR(clGetCommandQueueInfo) + +#define __CREATE_CONTEXT_ERR __ERR_STR(clCreateContext) +#define __CREATE_CONTEXT_FROM_TYPE_ERR __ERR_STR(clCreateContextFromType) +#define __GET_SUPPORTED_IMAGE_FORMATS_ERR __ERR_STR(clGetSupportedImageFormats) + +#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer) +#define __COPY_ERR __ERR_STR(cl::copy) +#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer) +#define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) +#define __CREATE_GL_RENDER_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) +#define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo) +#if defined(CL_VERSION_1_2) +#define __CREATE_IMAGE_ERR __ERR_STR(clCreateImage) +#define __CREATE_GL_TEXTURE_ERR __ERR_STR(clCreateFromGLTexture) +#define __IMAGE_DIMENSION_ERR __ERR_STR(Incorrect image dimensions) +#endif // #if defined(CL_VERSION_1_2) +#define __CREATE_SAMPLER_ERR __ERR_STR(clCreateSampler) +#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback) + +#define __CREATE_USER_EVENT_ERR __ERR_STR(clCreateUserEvent) +#define __SET_USER_EVENT_STATUS_ERR __ERR_STR(clSetUserEventStatus) +#define __SET_EVENT_CALLBACK_ERR __ERR_STR(clSetEventCallback) +#define __WAIT_FOR_EVENTS_ERR __ERR_STR(clWaitForEvents) + +#define __CREATE_KERNEL_ERR __ERR_STR(clCreateKernel) +#define __SET_KERNEL_ARGS_ERR __ERR_STR(clSetKernelArg) +#define __CREATE_PROGRAM_WITH_SOURCE_ERR __ERR_STR(clCreateProgramWithSource) +#define __CREATE_PROGRAM_WITH_BINARY_ERR __ERR_STR(clCreateProgramWithBinary) +#if defined(CL_VERSION_1_2) +#define __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR __ERR_STR(clCreateProgramWithBuiltInKernels) +#endif // #if defined(CL_VERSION_1_2) +#define __BUILD_PROGRAM_ERR __ERR_STR(clBuildProgram) +#if defined(CL_VERSION_1_2) +#define __COMPILE_PROGRAM_ERR __ERR_STR(clCompileProgram) + +#endif // #if defined(CL_VERSION_1_2) +#define __CREATE_KERNELS_IN_PROGRAM_ERR __ERR_STR(clCreateKernelsInProgram) + +#define __CREATE_COMMAND_QUEUE_ERR __ERR_STR(clCreateCommandQueue) +#define __SET_COMMAND_QUEUE_PROPERTY_ERR __ERR_STR(clSetCommandQueueProperty) +#define __ENQUEUE_READ_BUFFER_ERR __ERR_STR(clEnqueueReadBuffer) +#define __ENQUEUE_READ_BUFFER_RECT_ERR __ERR_STR(clEnqueueReadBufferRect) +#define __ENQUEUE_WRITE_BUFFER_ERR __ERR_STR(clEnqueueWriteBuffer) +#define __ENQUEUE_WRITE_BUFFER_RECT_ERR __ERR_STR(clEnqueueWriteBufferRect) +#define __ENQEUE_COPY_BUFFER_ERR __ERR_STR(clEnqueueCopyBuffer) +#define __ENQEUE_COPY_BUFFER_RECT_ERR __ERR_STR(clEnqueueCopyBufferRect) +#define __ENQUEUE_FILL_BUFFER_ERR __ERR_STR(clEnqueueFillBuffer) +#define __ENQUEUE_READ_IMAGE_ERR __ERR_STR(clEnqueueReadImage) +#define __ENQUEUE_WRITE_IMAGE_ERR __ERR_STR(clEnqueueWriteImage) +#define __ENQUEUE_COPY_IMAGE_ERR __ERR_STR(clEnqueueCopyImage) +#define __ENQUEUE_FILL_IMAGE_ERR __ERR_STR(clEnqueueFillImage) +#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR __ERR_STR(clEnqueueCopyImageToBuffer) +#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR __ERR_STR(clEnqueueCopyBufferToImage) +#define __ENQUEUE_MAP_BUFFER_ERR __ERR_STR(clEnqueueMapBuffer) +#define __ENQUEUE_MAP_IMAGE_ERR __ERR_STR(clEnqueueMapImage) +#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR __ERR_STR(clEnqueueUnMapMemObject) +#define __ENQUEUE_NDRANGE_KERNEL_ERR __ERR_STR(clEnqueueNDRangeKernel) +#define __ENQUEUE_TASK_ERR __ERR_STR(clEnqueueTask) +#define __ENQUEUE_NATIVE_KERNEL __ERR_STR(clEnqueueNativeKernel) +#if defined(CL_VERSION_1_2) +#define __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR __ERR_STR(clEnqueueMigrateMemObjects) +#endif // #if defined(CL_VERSION_1_2) + +#define __ENQUEUE_ACQUIRE_GL_ERR __ERR_STR(clEnqueueAcquireGLObjects) +#define __ENQUEUE_RELEASE_GL_ERR __ERR_STR(clEnqueueReleaseGLObjects) + + +#define __RETAIN_ERR __ERR_STR(Retain Object) +#define __RELEASE_ERR __ERR_STR(Release Object) +#define __FLUSH_ERR __ERR_STR(clFlush) +#define __FINISH_ERR __ERR_STR(clFinish) +#define __VECTOR_CAPACITY_ERR __ERR_STR(Vector capacity error) + +/** + * CL 1.2 version that uses device fission. + */ +#if defined(CL_VERSION_1_2) +#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevices) +#else +#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevicesEXT) +#endif // #if defined(CL_VERSION_1_2) + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) +#define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker) +#define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents) +#define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier) +#define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler) +#define __CREATE_GL_TEXTURE_2D_ERR __ERR_STR(clCreateFromGLTexture2D) +#define __CREATE_GL_TEXTURE_3D_ERR __ERR_STR(clCreateFromGLTexture3D) +#define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D) +#define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D) +#endif // #if defined(CL_VERSION_1_1) + +#endif // __CL_USER_OVERRIDE_ERROR_STRINGS +//! \endcond + +/** + * CL 1.2 marker and barrier commands + */ +#if defined(CL_VERSION_1_2) +#define __ENQUEUE_MARKER_WAIT_LIST_ERR __ERR_STR(clEnqueueMarkerWithWaitList) +#define __ENQUEUE_BARRIER_WAIT_LIST_ERR __ERR_STR(clEnqueueBarrierWithWaitList) +#endif // #if defined(CL_VERSION_1_2) + +#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING) +typedef std::string STRING_CLASS; +#elif !defined(__USE_DEV_STRING) + +/*! \class string + * \brief Simple string class, that provides a limited subset of std::string + * functionality but avoids many of the issues that come with that class. + + * \note Deprecated. Please use std::string as default or + * re-define the string class to match the std::string + * interface by defining STRING_CLASS + */ +class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED string CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED +{ +private: + ::size_t size_; + char * str_; +public: + //! \brief Constructs an empty string, allocating no memory. + string(void) : size_(0), str_(NULL) + { + } + + /*! \brief Constructs a string populated from an arbitrary value of + * specified size. + * + * An extra '\0' is added, in case none was contained in str. + * + * \param str the initial value of the string instance. Note that '\0' + * characters receive no special treatment. If NULL, + * the string is left empty, with a size of 0. + * + * \param size the number of characters to copy from str. + */ + string(const char * str, ::size_t size) : + size_(size), + str_(NULL) + { + if( size > 0 ) { + str_ = new char[size_+1]; + if (str_ != NULL) { + memcpy(str_, str, size_ * sizeof(char)); + str_[size_] = '\0'; + } + else { + size_ = 0; + } + } + } + + /*! \brief Constructs a string populated from a null-terminated value. + * + * \param str the null-terminated initial value of the string instance. + * If NULL, the string is left empty, with a size of 0. + */ + string(const char * str) : + size_(0), + str_(NULL) + { + if( str ) { + size_= ::strlen(str); + } + if( size_ > 0 ) { + str_ = new char[size_ + 1]; + if (str_ != NULL) { + memcpy(str_, str, (size_ + 1) * sizeof(char)); + } + } + } + + void resize( ::size_t n ) + { + if( size_ == n ) { + return; + } + if (n == 0) { + if( str_ ) { + delete [] str_; + } + str_ = NULL; + size_ = 0; + } + else { + char *newString = new char[n + 1]; + int copySize = n; + if( size_ < n ) { + copySize = size_; + } + size_ = n; + + if(str_) { + memcpy(newString, str_, (copySize + 1) * sizeof(char)); + } + if( copySize < size_ ) { + memset(newString + copySize, 0, size_ - copySize); + } + newString[size_] = '\0'; + + delete [] str_; + str_ = newString; + } + } + + const char& operator[] ( ::size_t pos ) const + { + return str_[pos]; + } + + char& operator[] ( ::size_t pos ) + { + return str_[pos]; + } + + /*! \brief Copies the value of another string to this one. + * + * \param rhs the string to copy. + * + * \returns a reference to the modified instance. + */ + string& operator=(const string& rhs) + { + if (this == &rhs) { + return *this; + } + + if( str_ != NULL ) { + delete [] str_; + str_ = NULL; + size_ = 0; + } + + if (rhs.size_ == 0 || rhs.str_ == NULL) { + str_ = NULL; + size_ = 0; + } + else { + str_ = new char[rhs.size_ + 1]; + size_ = rhs.size_; + + if (str_ != NULL) { + memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char)); + } + else { + size_ = 0; + } + } + + return *this; + } + + /*! \brief Constructs a string by copying the value of another instance. + * + * \param rhs the string to copy. + */ + string(const string& rhs) : + size_(0), + str_(NULL) + { + *this = rhs; + } + + //! \brief Destructor - frees memory used to hold the current value. + ~string() + { + delete[] str_; + str_ = NULL; + } + + //! \brief Queries the length of the string, excluding any added '\0's. + ::size_t size(void) const { return size_; } + + //! \brief Queries the length of the string, excluding any added '\0's. + ::size_t length(void) const { return size(); } + + /*! \brief Returns a pointer to the private copy held by this instance, + * or "" if empty/unset. + */ + const char * c_str(void) const { return (str_) ? str_ : "";} +}; +typedef cl::string STRING_CLASS; +#endif // #elif !defined(__USE_DEV_STRING) + +#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) +#define VECTOR_CLASS std::vector +#elif !defined(__USE_DEV_VECTOR) +#define VECTOR_CLASS cl::vector + +#if !defined(__MAX_DEFAULT_VECTOR_SIZE) +#define __MAX_DEFAULT_VECTOR_SIZE 10 +#endif + +/*! \class vector + * \brief Fixed sized vector implementation that mirroring + * + * \note Deprecated. Please use std::vector as default or + * re-define the vector class to match the std::vector + * interface by defining VECTOR_CLASS + + * \note Not recommended for use with custom objects as + * current implementation will construct N elements + * + * std::vector functionality. + * \brief Fixed sized vector compatible with std::vector. + * + * \note + * This differs from std::vector<> not just in memory allocation, + * but also in terms of when members are constructed, destroyed, + * and assigned instead of being copy constructed. + * + * \param T type of element contained in the vector. + * + * \param N maximum size of the vector. + */ +template +class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED vector CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED +{ +private: + T data_[N]; + unsigned int size_; + +public: + //! \brief Constructs an empty vector with no memory allocated. + vector() : + size_(static_cast(0)) + {} + + //! \brief Deallocates the vector's memory and destroys all of its elements. + ~vector() + { + clear(); + } + + //! \brief Returns the number of elements currently contained. + unsigned int size(void) const + { + return size_; + } + + /*! \brief Empties the vector of all elements. + * \note + * This does not deallocate memory but will invoke destructors + * on contained elements. + */ + void clear() + { + while(!empty()) { + pop_back(); + } + } + + /*! \brief Appends an element after the last valid element. + * Calling this on a vector that has reached capacity will throw an + * exception if exceptions are enabled. + */ + void push_back (const T& x) + { + if (size() < N) { + new (&data_[size_]) T(x); + size_++; + } else { + detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); + } + } + + /*! \brief Removes the last valid element from the vector. + * Calling this on an empty vector will throw an exception + * if exceptions are enabled. + */ + void pop_back(void) + { + if (size_ != 0) { + --size_; + data_[size_].~T(); + } else { + detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); + } + } + + /*! \brief Constructs with a value copied from another. + * + * \param vec the vector to copy. + */ + vector(const vector& vec) : + size_(vec.size_) + { + if (size_ != 0) { + assign(vec.begin(), vec.end()); + } + } + + /*! \brief Constructs with a specified number of initial elements. + * + * \param size number of initial elements. + * + * \param val value of initial elements. + */ + vector(unsigned int size, const T& val = T()) : + size_(0) + { + for (unsigned int i = 0; i < size; i++) { + push_back(val); + } + } + + /*! \brief Overwrites the current content with that copied from another + * instance. + * + * \param rhs vector to copy. + * + * \returns a reference to this. + */ + vector& operator=(const vector& rhs) + { + if (this == &rhs) { + return *this; + } + + if (rhs.size_ != 0) { + assign(rhs.begin(), rhs.end()); + } else { + clear(); + } + + return *this; + } + + /*! \brief Tests equality against another instance. + * + * \param vec the vector against which to compare. + */ + bool operator==(vector &vec) + { + if (size() != vec.size()) { + return false; + } + + for( unsigned int i = 0; i < size(); ++i ) { + if( operator[](i) != vec[i] ) { + return false; + } + } + return true; + } + + //! \brief Conversion operator to T*. + operator T* () { return data_; } + + //! \brief Conversion operator to const T*. + operator const T* () const { return data_; } + + //! \brief Tests whether this instance has any elements. + bool empty (void) const + { + return size_==0; + } + + //! \brief Returns the maximum number of elements this instance can hold. + unsigned int max_size (void) const + { + return N; + } + + //! \brief Returns the maximum number of elements this instance can hold. + unsigned int capacity () const + { + return N; + } + + /*! \brief Returns a reference to a given element. + * + * \param index which element to access. * + * \note + * The caller is responsible for ensuring index is >= 0 and < size(). + */ + T& operator[](int index) + { + return data_[index]; + } + + /*! \brief Returns a const reference to a given element. + * + * \param index which element to access. + * + * \note + * The caller is responsible for ensuring index is >= 0 and < size(). + */ + const T& operator[](int index) const + { + return data_[index]; + } + + /*! \brief Assigns elements of the vector based on a source iterator range. + * + * \param start Beginning iterator of source range + * \param end Enditerator of source range + * + * \note + * Will throw an exception if exceptions are enabled and size exceeded. + */ + template + void assign(I start, I end) + { + clear(); + while(start != end) { + push_back(*start); + start++; + } + } + + /*! \class iterator + * \brief Const iterator class for vectors + */ + class iterator + { + private: + const vector *vec_; + int index_; + + /** + * Internal iterator constructor to capture reference + * to the vector it iterates over rather than taking + * the vector by copy. + */ + iterator (const vector &vec, int index) : + vec_(&vec) + { + if( !vec.empty() ) { + index_ = index; + } else { + index_ = -1; + } + } + + public: + iterator(void) : + index_(-1), + vec_(NULL) + { + } + + iterator(const iterator& rhs) : + vec_(rhs.vec_), + index_(rhs.index_) + { + } + + ~iterator(void) {} + + static iterator begin(const cl::vector &vec) + { + iterator i(vec, 0); + + return i; + } + + static iterator end(const cl::vector &vec) + { + iterator i(vec, vec.size()); + + return i; + } + + bool operator==(iterator i) + { + return ((vec_ == i.vec_) && + (index_ == i.index_)); + } + + bool operator!=(iterator i) + { + return (!(*this==i)); + } + + iterator& operator++() + { + ++index_; + return *this; + } + + iterator operator++(int) + { + iterator retVal(*this); + ++index_; + return retVal; + } + + iterator& operator--() + { + --index_; + return *this; + } + + iterator operator--(int) + { + iterator retVal(*this); + --index_; + return retVal; + } + + const T& operator *() const + { + return (*vec_)[index_]; + } + }; + + iterator begin(void) + { + return iterator::begin(*this); + } + + iterator begin(void) const + { + return iterator::begin(*this); + } + + iterator end(void) + { + return iterator::end(*this); + } + + iterator end(void) const + { + return iterator::end(*this); + } + + T& front(void) + { + return data_[0]; + } + + T& back(void) + { + return data_[size_]; + } + + const T& front(void) const + { + return data_[0]; + } + + const T& back(void) const + { + return data_[size_-1]; + } +}; +#endif // #if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) + + + + + +namespace detail { +#define __DEFAULT_NOT_INITIALIZED 1 +#define __DEFAULT_BEING_INITIALIZED 2 +#define __DEFAULT_INITIALIZED 4 + + /* + * Compare and exchange primitives are needed for handling of defaults + */ + inline int compare_exchange(volatile int * dest, int exchange, int comparand) + { +#ifdef _WIN32 + return (int)(InterlockedCompareExchange( + (volatile long*)dest, + (long)exchange, + (long)comparand)); +#elif defined(__APPLE__) || defined(__MACOSX) + return OSAtomicOr32Orig((uint32_t)exchange, (volatile uint32_t*)dest); +#else // !_WIN32 || defined(__APPLE__) || defined(__MACOSX) + return (__sync_val_compare_and_swap( + dest, + comparand, + exchange)); +#endif // !_WIN32 + } + + inline void fence() { _mm_mfence(); } +}; // namespace detail + + +/*! \brief class used to interface between C++ and + * OpenCL C calls that require arrays of size_t values, whose + * size is known statically. + */ +template +class size_t +{ +private: + ::size_t data_[N]; + +public: + //! \brief Initialize size_t to all 0s + size_t() + { + for( int i = 0; i < N; ++i ) { + data_[i] = 0; + } + } + + ::size_t& operator[](int index) + { + return data_[index]; + } + + const ::size_t& operator[](int index) const + { + return data_[index]; + } + + //! \brief Conversion operator to T*. + operator ::size_t* () { return data_; } + + //! \brief Conversion operator to const T*. + operator const ::size_t* () const { return data_; } +}; + +namespace detail { + +// Generic getInfoHelper. The final parameter is used to guide overload +// resolution: the actual parameter passed is an int, which makes this +// a worse conversion sequence than a specialization that declares the +// parameter as an int. +template +inline cl_int getInfoHelper(Functor f, cl_uint name, T* param, long) +{ + return f(name, sizeof(T), param, NULL); +} + +// Specialized getInfoHelper for VECTOR_CLASS params +template +inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, long) +{ + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + T* value = (T*) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + param->assign(&value[0], &value[required/sizeof(T)]); + return CL_SUCCESS; +} + +/* Specialization for reference-counted types. This depends on the + * existence of Wrapper::cl_type, and none of the other types having the + * cl_type member. Note that simplify specifying the parameter as Wrapper + * does not work, because when using a derived type (e.g. Context) the generic + * template will provide a better match. + */ +template +inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, int, typename T::cl_type = 0) +{ + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + typename T::cl_type * value = (typename T::cl_type *) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + ::size_t elements = required / sizeof(typename T::cl_type); + param->assign(&value[0], &value[elements]); + for (::size_t i = 0; i < elements; i++) + { + if (value[i] != NULL) + { + err = (*param)[i].retain(); + if (err != CL_SUCCESS) { + return err; + } + } + } + return CL_SUCCESS; +} + +// Specialized for getInfo +template +inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, int) +{ + cl_int err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL); + + if (err != CL_SUCCESS) { + return err; + } + + return CL_SUCCESS; +} + +// Specialized GetInfoHelper for STRING_CLASS params +template +inline cl_int getInfoHelper(Func f, cl_uint name, STRING_CLASS* param, long) +{ + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + char* value = (char*) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + *param = value; + return CL_SUCCESS; +} + +// Specialized GetInfoHelper for cl::size_t params +template +inline cl_int getInfoHelper(Func f, cl_uint name, size_t* param, long) +{ + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + ::size_t* value = (::size_t*) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + for(int i = 0; i < N; ++i) { + (*param)[i] = value[i]; + } + + return CL_SUCCESS; +} + +template struct ReferenceHandler; + +/* Specialization for reference-counted types. This depends on the + * existence of Wrapper::cl_type, and none of the other types having the + * cl_type member. Note that simplify specifying the parameter as Wrapper + * does not work, because when using a derived type (e.g. Context) the generic + * template will provide a better match. + */ +template +inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_type = 0) +{ + typename T::cl_type value; + cl_int err = f(name, sizeof(value), &value, NULL); + if (err != CL_SUCCESS) { + return err; + } + *param = value; + if (value != NULL) + { + err = param->retain(); + if (err != CL_SUCCESS) { + return err; + } + } + return CL_SUCCESS; +} + +#define __PARAM_NAME_INFO_1_0(F) \ + F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \ + \ + F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ + F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ + F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \ + F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ + F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ + F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \ + F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ + F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \ + F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ + F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \ + F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \ + \ + F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ + F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS) \ + F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS) \ + \ + F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ + F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ + F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ + F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_uint) \ + \ + F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ + \ + F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ + F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ + F(cl_mem_info, CL_MEM_SIZE, ::size_t) \ + F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ + F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ + F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ + F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ + \ + F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ + F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \ + F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \ + F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \ + F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \ + F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \ + F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \ + \ + F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ + F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ + F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \ + F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \ + F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \ + \ + F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ + F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ + F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ + F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS) \ + F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \ + F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \ + F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS) \ + \ + F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \ + \ + F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \ + F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ + F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ + F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ + F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ + \ + F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \ + F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \ + F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ + \ + F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ + F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ + F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ + F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) + +#if defined(CL_VERSION_1_1) +#define __PARAM_NAME_INFO_1_1(F) \ + F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ + F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \ + F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, STRING_CLASS) \ + \ + F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ + F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \ + \ + F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \ + F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ + \ + F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) +#endif // CL_VERSION_1_1 + + +#if defined(CL_VERSION_1_2) +#define __PARAM_NAME_INFO_1_2(F) \ + F(cl_image_info, CL_IMAGE_BUFFER, cl::Buffer) \ + \ + F(cl_program_info, CL_PROGRAM_NUM_KERNELS, ::size_t) \ + F(cl_program_info, CL_PROGRAM_KERNEL_NAMES, STRING_CLASS) \ + \ + F(cl_program_build_info, CL_PROGRAM_BINARY_TYPE, cl_program_binary_type) \ + \ + F(cl_kernel_info, CL_KERNEL_ATTRIBUTES, STRING_CLASS) \ + \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_ADDRESS_QUALIFIER, cl_kernel_arg_address_qualifier) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_ACCESS_QUALIFIER, cl_kernel_arg_access_qualifier) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_NAME, STRING_CLASS) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, STRING_CLASS) \ + \ + F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl_device_id) \ + F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_PARTITION_TYPE, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, ::size_t) \ + F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \ + F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, STRING_CLASS) +#endif // #if defined(CL_VERSION_1_2) + +#if defined(USE_CL_DEVICE_FISSION) +#define __PARAM_NAME_DEVICE_FISSION(F) \ + F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ + F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ + F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS) +#endif // USE_CL_DEVICE_FISSION + +template +struct param_traits {}; + +#define __CL_DECLARE_PARAM_TRAITS(token, param_name, T) \ +struct token; \ +template<> \ +struct param_traits \ +{ \ + enum { value = param_name }; \ + typedef T param_type; \ +}; + +__PARAM_NAME_INFO_1_0(__CL_DECLARE_PARAM_TRAITS) +#if defined(CL_VERSION_1_1) +__PARAM_NAME_INFO_1_1(__CL_DECLARE_PARAM_TRAITS) +#endif // CL_VERSION_1_1 +#if defined(CL_VERSION_1_2) +__PARAM_NAME_INFO_1_2(__CL_DECLARE_PARAM_TRAITS) +#endif // CL_VERSION_1_1 + +#if defined(USE_CL_DEVICE_FISSION) +__PARAM_NAME_DEVICE_FISSION(__CL_DECLARE_PARAM_TRAITS); +#endif // USE_CL_DEVICE_FISSION + +#ifdef CL_PLATFORM_ICD_SUFFIX_KHR +__CL_DECLARE_PARAM_TRAITS(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, STRING_CLASS) +#endif + +#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong) +#endif + +#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, VECTOR_CLASS< ::size_t>) +#endif +#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_SIMD_WIDTH_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint) +#endif + +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint) +#endif +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint) +#endif +#ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_REGISTERS_PER_BLOCK_NV, cl_uint) +#endif +#ifdef CL_DEVICE_WARP_SIZE_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WARP_SIZE_NV, cl_uint) +#endif +#ifdef CL_DEVICE_GPU_OVERLAP_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GPU_OVERLAP_NV, cl_bool) +#endif +#ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, cl_bool) +#endif +#ifdef CL_DEVICE_INTEGRATED_MEMORY_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_INTEGRATED_MEMORY_NV, cl_bool) +#endif + +// Convenience functions + +template +inline cl_int +getInfo(Func f, cl_uint name, T* param) +{ + return getInfoHelper(f, name, param, 0); +} + +template +struct GetInfoFunctor0 +{ + Func f_; const Arg0& arg0_; + cl_int operator ()( + cl_uint param, ::size_t size, void* value, ::size_t* size_ret) + { return f_(arg0_, param, size, value, size_ret); } +}; + +template +struct GetInfoFunctor1 +{ + Func f_; const Arg0& arg0_; const Arg1& arg1_; + cl_int operator ()( + cl_uint param, ::size_t size, void* value, ::size_t* size_ret) + { return f_(arg0_, arg1_, param, size, value, size_ret); } +}; + +template +inline cl_int +getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) +{ + GetInfoFunctor0 f0 = { f, arg0 }; + return getInfoHelper(f0, name, param, 0); +} + +template +inline cl_int +getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) +{ + GetInfoFunctor1 f0 = { f, arg0, arg1 }; + return getInfoHelper(f0, name, param, 0); +} + +template +struct ReferenceHandler +{ }; + +#if defined(CL_VERSION_1_2) +/** + * OpenCL 1.2 devices do have retain/release. + */ +template <> +struct ReferenceHandler +{ + /** + * Retain the device. + * \param device A valid device created using createSubDevices + * \return + * CL_SUCCESS if the function executed successfully. + * CL_INVALID_DEVICE if device was not a valid subdevice + * CL_OUT_OF_RESOURCES + * CL_OUT_OF_HOST_MEMORY + */ + static cl_int retain(cl_device_id device) + { return ::clRetainDevice(device); } + /** + * Retain the device. + * \param device A valid device created using createSubDevices + * \return + * CL_SUCCESS if the function executed successfully. + * CL_INVALID_DEVICE if device was not a valid subdevice + * CL_OUT_OF_RESOURCES + * CL_OUT_OF_HOST_MEMORY + */ + static cl_int release(cl_device_id device) + { return ::clReleaseDevice(device); } +}; +#else // #if defined(CL_VERSION_1_2) +/** + * OpenCL 1.1 devices do not have retain/release. + */ +template <> +struct ReferenceHandler +{ + // cl_device_id does not have retain(). + static cl_int retain(cl_device_id) + { return CL_SUCCESS; } + // cl_device_id does not have release(). + static cl_int release(cl_device_id) + { return CL_SUCCESS; } +}; +#endif // #if defined(CL_VERSION_1_2) + +template <> +struct ReferenceHandler +{ + // cl_platform_id does not have retain(). + static cl_int retain(cl_platform_id) + { return CL_SUCCESS; } + // cl_platform_id does not have release(). + static cl_int release(cl_platform_id) + { return CL_SUCCESS; } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_context context) + { return ::clRetainContext(context); } + static cl_int release(cl_context context) + { return ::clReleaseContext(context); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_command_queue queue) + { return ::clRetainCommandQueue(queue); } + static cl_int release(cl_command_queue queue) + { return ::clReleaseCommandQueue(queue); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_mem memory) + { return ::clRetainMemObject(memory); } + static cl_int release(cl_mem memory) + { return ::clReleaseMemObject(memory); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_sampler sampler) + { return ::clRetainSampler(sampler); } + static cl_int release(cl_sampler sampler) + { return ::clReleaseSampler(sampler); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_program program) + { return ::clRetainProgram(program); } + static cl_int release(cl_program program) + { return ::clReleaseProgram(program); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_kernel kernel) + { return ::clRetainKernel(kernel); } + static cl_int release(cl_kernel kernel) + { return ::clReleaseKernel(kernel); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_event event) + { return ::clRetainEvent(event); } + static cl_int release(cl_event event) + { return ::clReleaseEvent(event); } +}; + + +// Extracts version number with major in the upper 16 bits, minor in the lower 16 +static cl_uint getVersion(const char *versionInfo) +{ + int highVersion = 0; + int lowVersion = 0; + int index = 7; + while(versionInfo[index] != '.' ) { + highVersion *= 10; + highVersion += versionInfo[index]-'0'; + ++index; + } + ++index; + while(versionInfo[index] != ' ' ) { + lowVersion *= 10; + lowVersion += versionInfo[index]-'0'; + ++index; + } + return (highVersion << 16) | lowVersion; +} + +static cl_uint getPlatformVersion(cl_platform_id platform) +{ + ::size_t size = 0; + clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &size); + char *versionInfo = (char *) alloca(size); + clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, &versionInfo[0], &size); + return getVersion(versionInfo); +} + +static cl_uint getDevicePlatformVersion(cl_device_id device) +{ + cl_platform_id platform; + clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL); + return getPlatformVersion(platform); +} + +#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +static cl_uint getContextPlatformVersion(cl_context context) +{ + // The platform cannot be queried directly, so we first have to grab a + // device and obtain its context + ::size_t size = 0; + clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size); + if (size == 0) + return 0; + cl_device_id *devices = (cl_device_id *) alloca(size); + clGetContextInfo(context, CL_CONTEXT_DEVICES, size, devices, NULL); + return getDevicePlatformVersion(devices[0]); +} +#endif // #if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + +template +class Wrapper +{ +public: + typedef T cl_type; + +protected: + cl_type object_; + +public: + Wrapper() : object_(NULL) { } + + Wrapper(const cl_type &obj) : object_(obj) { } + + ~Wrapper() + { + if (object_ != NULL) { release(); } + } + + Wrapper(const Wrapper& rhs) + { + object_ = rhs.object_; + if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + } + + Wrapper& operator = (const Wrapper& rhs) + { + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + object_ = rhs.object_; + if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + return *this; + } + + Wrapper& operator = (const cl_type &rhs) + { + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + object_ = rhs; + return *this; + } + + cl_type operator ()() const { return object_; } + + cl_type& operator ()() { return object_; } + +protected: + template + friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); + + cl_int retain() const + { + return ReferenceHandler::retain(object_); + } + + cl_int release() const + { + return ReferenceHandler::release(object_); + } +}; + +template <> +class Wrapper +{ +public: + typedef cl_device_id cl_type; + +protected: + cl_type object_; + bool referenceCountable_; + + static bool isReferenceCountable(cl_device_id device) + { + bool retVal = false; + if (device != NULL) { + int version = getDevicePlatformVersion(device); + if(version > ((1 << 16) + 1)) { + retVal = true; + } + } + return retVal; + } + +public: + Wrapper() : object_(NULL), referenceCountable_(false) + { + } + + Wrapper(const cl_type &obj) : object_(obj), referenceCountable_(false) + { + referenceCountable_ = isReferenceCountable(obj); + } + + ~Wrapper() + { + if (object_ != NULL) { release(); } + } + + Wrapper(const Wrapper& rhs) + { + object_ = rhs.object_; + referenceCountable_ = isReferenceCountable(object_); + if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + } + + Wrapper& operator = (const Wrapper& rhs) + { + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + object_ = rhs.object_; + referenceCountable_ = rhs.referenceCountable_; + if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + return *this; + } + + Wrapper& operator = (const cl_type &rhs) + { + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + object_ = rhs; + referenceCountable_ = isReferenceCountable(object_); + return *this; + } + + cl_type operator ()() const { return object_; } + + cl_type& operator ()() { return object_; } + +protected: + template + friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); + + template + friend inline cl_int getInfoHelper(Func, cl_uint, VECTOR_CLASS*, int, typename U::cl_type); + + cl_int retain() const + { + if( referenceCountable_ ) { + return ReferenceHandler::retain(object_); + } + else { + return CL_SUCCESS; + } + } + + cl_int release() const + { + if( referenceCountable_ ) { + return ReferenceHandler::release(object_); + } + else { + return CL_SUCCESS; + } + } +}; + +} // namespace detail +//! \endcond + +/*! \stuct ImageFormat + * \brief Adds constructors and member functions for cl_image_format. + * + * \see cl_image_format + */ +struct ImageFormat : public cl_image_format +{ + //! \brief Default constructor - performs no initialization. + ImageFormat(){} + + //! \brief Initializing constructor. + ImageFormat(cl_channel_order order, cl_channel_type type) + { + image_channel_order = order; + image_channel_data_type = type; + } + + //! \brief Assignment operator. + ImageFormat& operator = (const ImageFormat& rhs) + { + if (this != &rhs) { + this->image_channel_data_type = rhs.image_channel_data_type; + this->image_channel_order = rhs.image_channel_order; + } + return *this; + } +}; + +/*! \brief Class interface for cl_device_id. + * + * \note Copies of these objects are inexpensive, since they don't 'own' + * any underlying resources or data structures. + * + * \see cl_device_id + */ +class Device : public detail::Wrapper +{ +public: + //! \brief Default constructor - initializes to NULL. + Device() : detail::Wrapper() { } + + /*! \brief Copy constructor. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + Device(const Device& device) : detail::Wrapper(device) { } + + /*! \brief Constructor from cl_device_id. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + Device(const cl_device_id &device) : detail::Wrapper(device) { } + + /*! \brief Returns the first device on the default context. + * + * \see Context::getDefault() + */ + static Device getDefault(cl_int * err = NULL); + + /*! \brief Assignment operator from Device. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + Device& operator = (const Device& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment operator from cl_device_id. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + Device& operator = (const cl_device_id& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetDeviceInfo(). + template + cl_int getInfo(cl_device_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetDeviceInfo, object_, name, param), + __GET_DEVICE_INFO_ERR); + } + + //! \brief Wrapper for clGetDeviceInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_device_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /** + * CL 1.2 version + */ +#if defined(CL_VERSION_1_2) + //! \brief Wrapper for clCreateSubDevicesEXT(). + cl_int createSubDevices( + const cl_device_partition_property * properties, + VECTOR_CLASS* devices) + { + cl_uint n = 0; + cl_int err = clCreateSubDevices(object_, properties, 0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = clCreateSubDevices(object_, properties, n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } +#endif // #if defined(CL_VERSION_1_2) + +/** + * CL 1.1 version that uses device fission. + */ +#if defined(CL_VERSION_1_1) +#if defined(USE_CL_DEVICE_FISSION) + cl_int createSubDevices( + const cl_device_partition_property_ext * properties, + VECTOR_CLASS* devices) + { + typedef CL_API_ENTRY cl_int + ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( + cl_device_id /*in_device*/, + const cl_device_partition_property_ext * /* properties */, + cl_uint /*num_entries*/, + cl_device_id * /*out_devices*/, + cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; + __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT); + + cl_uint n = 0; + cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } +#endif // #if defined(USE_CL_DEVICE_FISSION) +#endif // #if defined(CL_VERSION_1_1) +}; + +/*! \brief Class interface for cl_platform_id. + * + * \note Copies of these objects are inexpensive, since they don't 'own' + * any underlying resources or data structures. + * + * \see cl_platform_id + */ +class Platform : public detail::Wrapper +{ +public: + //! \brief Default constructor - initializes to NULL. + Platform() : detail::Wrapper() { } + + /*! \brief Copy constructor. + * + * This simply copies the platform ID value, which is an inexpensive operation. + */ + Platform(const Platform& platform) : detail::Wrapper(platform) { } + + /*! \brief Constructor from cl_platform_id. + * + * This simply copies the platform ID value, which is an inexpensive operation. + */ + Platform(const cl_platform_id &platform) : detail::Wrapper(platform) { } + + /*! \brief Assignment operator from Platform. + * + * This simply copies the platform ID value, which is an inexpensive operation. + */ + Platform& operator = (const Platform& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment operator from cl_platform_id. + * + * This simply copies the platform ID value, which is an inexpensive operation. + */ + Platform& operator = (const cl_platform_id& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetPlatformInfo(). + cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetPlatformInfo, object_, name, param), + __GET_PLATFORM_INFO_ERR); + } + + //! \brief Wrapper for clGetPlatformInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_platform_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /*! \brief Gets a list of devices for this platform. + * + * Wraps clGetDeviceIDs(). + */ + cl_int getDevices( + cl_device_type type, + VECTOR_CLASS* devices) const + { + cl_uint n = 0; + if( devices == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); + } + cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = ::clGetDeviceIDs(object_, type, n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } + +#if defined(USE_DX_INTEROP) + /*! \brief Get the list of available D3D10 devices. + * + * \param d3d_device_source. + * + * \param d3d_object. + * + * \param d3d_device_set. + * + * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device + * values returned in devices can be used to identify a specific OpenCL + * device. If \a devices argument is NULL, this argument is ignored. + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully. + * + * The application can query specific capabilities of the OpenCL device(s) + * returned by cl::getDevices. This can be used by the application to + * determine which device(s) to use. + * + * \note In the case that exceptions are enabled and a return value + * other than CL_SUCCESS is generated, then cl::Error exception is + * generated. + */ + cl_int getDevices( + cl_d3d10_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + VECTOR_CLASS* devices) const + { + typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( + cl_platform_id platform, + cl_d3d10_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id * devices, + cl_uint* num_devices); + + if( devices == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); + } + + static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; + __INIT_CL_EXT_FCN_PTR_PLATFORM(object_, clGetDeviceIDsFromD3D10KHR); + + cl_uint n = 0; + cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( + object_, + d3d_device_source, + d3d_object, + d3d_device_set, + 0, + NULL, + &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = pfn_clGetDeviceIDsFromD3D10KHR( + object_, + d3d_device_source, + d3d_object, + d3d_device_set, + n, + ids, + NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } +#endif + + /*! \brief Gets a list of available platforms. + * + * Wraps clGetPlatformIDs(). + */ + static cl_int get( + VECTOR_CLASS* platforms) + { + cl_uint n = 0; + + if( platforms == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); + } + + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + cl_platform_id* ids = (cl_platform_id*) alloca( + n * sizeof(cl_platform_id)); + err = ::clGetPlatformIDs(n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + platforms->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } + + /*! \brief Gets the first available platform. + * + * Wraps clGetPlatformIDs(), returning the first result. + */ + static cl_int get( + Platform * platform) + { + cl_uint n = 0; + + if( platform == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); + } + + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + cl_platform_id* ids = (cl_platform_id*) alloca( + n * sizeof(cl_platform_id)); + err = ::clGetPlatformIDs(n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + *platform = ids[0]; + return CL_SUCCESS; + } + + /*! \brief Gets the first available platform, returning it by value. + * + * Wraps clGetPlatformIDs(), returning the first result. + */ + static Platform get( + cl_int * errResult = NULL) + { + Platform platform; + cl_uint n = 0; + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + if (errResult != NULL) { + *errResult = err; + } + } + + cl_platform_id* ids = (cl_platform_id*) alloca( + n * sizeof(cl_platform_id)); + err = ::clGetPlatformIDs(n, ids, NULL); + + if (err != CL_SUCCESS) { + detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + if (errResult != NULL) { + *errResult = err; + } + + return ids[0]; + } + + static Platform getDefault( + cl_int *errResult = NULL ) + { + return get(errResult); + } + + +#if defined(CL_VERSION_1_2) + //! \brief Wrapper for clUnloadCompiler(). + cl_int + unloadCompiler() + { + return ::clUnloadPlatformCompiler(object_); + } +#endif // #if defined(CL_VERSION_1_2) +}; // class Platform + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) +/** + * Unload the OpenCL compiler. + * \note Deprecated for OpenCL 1.2. Use Platform::unloadCompiler instead. + */ +inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int +UnloadCompiler() CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +inline cl_int +UnloadCompiler() +{ + return ::clUnloadCompiler(); +} +#endif // #if defined(CL_VERSION_1_1) + +/*! \brief Class interface for cl_context. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_context as the original. For details, see + * clRetainContext() and clReleaseContext(). + * + * \see cl_context + */ +class Context + : public detail::Wrapper +{ +private: + static volatile int default_initialized_; + static Context default_; + static volatile cl_int default_error_; +public: + /*! \brief Destructor. + * + * This calls clReleaseContext() on the value held by this instance. + */ + ~Context() { } + + /*! \brief Constructs a context including a list of specified devices. + * + * Wraps clCreateContext(). + */ + Context( + const VECTOR_CLASS& devices, + cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + ::size_t, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + + ::size_t numDevices = devices.size(); + cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); + for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + object_ = ::clCreateContext( + properties, (cl_uint) numDevices, + deviceIDs, + notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + if (err != NULL) { + *err = error; + } + } + + Context( + const Device& device, + cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + ::size_t, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + + cl_device_id deviceID = device(); + + object_ = ::clCreateContext( + properties, 1, + &deviceID, + notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructs a context including all or a subset of devices of a specified type. + * + * Wraps clCreateContextFromType(). + */ + Context( + cl_device_type type, + cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + ::size_t, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + +#if !defined(__APPLE__) || !defined(__MACOS) + cl_context_properties prop[4] = {CL_CONTEXT_PLATFORM, 0, 0, 0 }; + + if (properties == NULL) { + // Get a valid platform ID as we cannot send in a blank one + VECTOR_CLASS platforms; + error = Platform::get(&platforms); + if (error != CL_SUCCESS) { + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + return; + } + + // Check the platforms we found for a device of our specified type + cl_context_properties platform_id = 0; + for (unsigned int i = 0; i < platforms.size(); i++) { + + VECTOR_CLASS devices; + +#if defined(__CL_ENABLE_EXCEPTIONS) + try { +#endif + + error = platforms[i].getDevices(type, &devices); + +#if defined(__CL_ENABLE_EXCEPTIONS) + } catch (Error) {} + // Catch if exceptions are enabled as we don't want to exit if first platform has no devices of type + // We do error checking next anyway, and can throw there if needed +#endif + + // Only squash CL_SUCCESS and CL_DEVICE_NOT_FOUND + if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND) { + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + } + + if (devices.size() > 0) { + platform_id = (cl_context_properties)platforms[i](); + break; + } + } + + if (platform_id == 0) { + detail::errHandler(CL_DEVICE_NOT_FOUND, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = CL_DEVICE_NOT_FOUND; + } + return; + } + + prop[1] = platform_id; + properties = &prop[0]; + } +#endif + object_ = ::clCreateContextFromType( + properties, type, notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Returns a singleton context including all devices of CL_DEVICE_TYPE_DEFAULT. + * + * \note All calls to this function return the same cl_context as the first. + */ + static Context getDefault(cl_int * err = NULL) + { + int state = detail::compare_exchange( + &default_initialized_, + __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); + + if (state & __DEFAULT_INITIALIZED) { + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + if (state & __DEFAULT_BEING_INITIALIZED) { + // Assume writes will propagate eventually... + while(default_initialized_ != __DEFAULT_INITIALIZED) { + detail::fence(); + } + + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + cl_int error; + default_ = Context( + CL_DEVICE_TYPE_DEFAULT, + NULL, + NULL, + NULL, + &error); + + detail::fence(); + + default_error_ = error; + // Assume writes will propagate eventually... + default_initialized_ = __DEFAULT_INITIALIZED; + + detail::fence(); + + if (err != NULL) { + *err = default_error_; + } + return default_; + + } + + //! \brief Default constructor - initializes to NULL. + Context() : detail::Wrapper() { } + + /*! \brief Copy constructor. + * + * This calls clRetainContext() on the parameter's cl_context. + */ + Context(const Context& context) : detail::Wrapper(context) { } + + /*! \brief Constructor from cl_context - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_context + * into the new Context object. + */ + __CL_EXPLICIT_CONSTRUCTORS Context(const cl_context& context) : detail::Wrapper(context) { } + + /*! \brief Assignment operator from Context. + * + * This calls clRetainContext() on the parameter and clReleaseContext() on + * the previous value held by this instance. + */ + Context& operator = (const Context& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment operator from cl_context - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseContext() on the value previously held by this instance. + */ + Context& operator = (const cl_context& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetContextInfo(). + template + cl_int getInfo(cl_context_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetContextInfo, object_, name, param), + __GET_CONTEXT_INFO_ERR); + } + + //! \brief Wrapper for clGetContextInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_context_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /*! \brief Gets a list of supported image formats. + * + * Wraps clGetSupportedImageFormats(). + */ + cl_int getSupportedImageFormats( + cl_mem_flags flags, + cl_mem_object_type type, + VECTOR_CLASS* formats) const + { + cl_uint numEntries; + cl_int err = ::clGetSupportedImageFormats( + object_, + flags, + type, + 0, + NULL, + &numEntries); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); + } + + ImageFormat* value = (ImageFormat*) + alloca(numEntries * sizeof(ImageFormat)); + err = ::clGetSupportedImageFormats( + object_, + flags, + type, + numEntries, + (cl_image_format*) value, + NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); + } + + formats->assign(&value[0], &value[numEntries]); + return CL_SUCCESS; + } +}; + +inline Device Device::getDefault(cl_int * err) +{ + cl_int error; + Device device; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + device = context.getInfo()[0]; + if (err != NULL) { + *err = CL_SUCCESS; + } + } + + return device; +} + + +#ifdef _WIN32 +__declspec(selectany) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +__declspec(selectany) Context Context::default_; +__declspec(selectany) volatile cl_int Context::default_error_ = CL_SUCCESS; +#else +__attribute__((weak)) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +__attribute__((weak)) Context Context::default_; +__attribute__((weak)) volatile cl_int Context::default_error_ = CL_SUCCESS; +#endif + +/*! \brief Class interface for cl_event. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_event as the original. For details, see + * clRetainEvent() and clReleaseEvent(). + * + * \see cl_event + */ +class Event : public detail::Wrapper +{ +public: + /*! \brief Destructor. + * + * This calls clReleaseEvent() on the value held by this instance. + */ + ~Event() { } + + //! \brief Default constructor - initializes to NULL. + Event() : detail::Wrapper() { } + + /*! \brief Copy constructor. + * + * This calls clRetainEvent() on the parameter's cl_event. + */ + Event(const Event& event) : detail::Wrapper(event) { } + + /*! \brief Constructor from cl_event - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_event + * into the new Event object. + */ + Event(const cl_event& event) : detail::Wrapper(event) { } + + /*! \brief Assignment operator from cl_event - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseEvent() on the value previously held by this instance. + */ + Event& operator = (const Event& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment operator from cl_event. + * + * This calls clRetainEvent() on the parameter and clReleaseEvent() on + * the previous value held by this instance. + */ + Event& operator = (const cl_event& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetEventInfo(). + template + cl_int getInfo(cl_event_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetEventInfo, object_, name, param), + __GET_EVENT_INFO_ERR); + } + + //! \brief Wrapper for clGetEventInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_event_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + //! \brief Wrapper for clGetEventProfilingInfo(). + template + cl_int getProfilingInfo(cl_profiling_info name, T* param) const + { + return detail::errHandler(detail::getInfo( + &::clGetEventProfilingInfo, object_, name, param), + __GET_EVENT_PROFILE_INFO_ERR); + } + + //! \brief Wrapper for clGetEventProfilingInfo() that returns by value. + template typename + detail::param_traits::param_type + getProfilingInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_profiling_info, name>::param_type param; + cl_int result = getProfilingInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /*! \brief Blocks the calling thread until this event completes. + * + * Wraps clWaitForEvents(). + */ + cl_int wait() const + { + return detail::errHandler( + ::clWaitForEvents(1, &object_), + __WAIT_FOR_EVENTS_ERR); + } + +#if defined(CL_VERSION_1_1) + /*! \brief Registers a user callback function for a specific command execution status. + * + * Wraps clSetEventCallback(). + */ + cl_int setCallback( + cl_int type, + void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), + void * user_data = NULL) + { + return detail::errHandler( + ::clSetEventCallback( + object_, + type, + pfn_notify, + user_data), + __SET_EVENT_CALLBACK_ERR); + } +#endif + + /*! \brief Blocks the calling thread until every event specified is complete. + * + * Wraps clWaitForEvents(). + */ + static cl_int + waitForEvents(const VECTOR_CLASS& events) + { + return detail::errHandler( + ::clWaitForEvents( + (cl_uint) events.size(), (cl_event*)&events.front()), + __WAIT_FOR_EVENTS_ERR); + } +}; + +#if defined(CL_VERSION_1_1) +/*! \brief Class interface for user events (a subset of cl_event's). + * + * See Event for details about copy semantics, etc. + */ +class UserEvent : public Event +{ +public: + /*! \brief Constructs a user event on a given context. + * + * Wraps clCreateUserEvent(). + */ + UserEvent( + const Context& context, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateUserEvent( + context(), + &error); + + detail::errHandler(error, __CREATE_USER_EVENT_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + UserEvent() : Event() { } + + //! \brief Copy constructor - performs shallow copy. + UserEvent(const UserEvent& event) : Event(event) { } + + //! \brief Assignment Operator - performs shallow copy. + UserEvent& operator = (const UserEvent& rhs) + { + if (this != &rhs) { + Event::operator=(rhs); + } + return *this; + } + + /*! \brief Sets the execution status of a user event object. + * + * Wraps clSetUserEventStatus(). + */ + cl_int setStatus(cl_int status) + { + return detail::errHandler( + ::clSetUserEventStatus(object_,status), + __SET_USER_EVENT_STATUS_ERR); + } +}; +#endif + +/*! \brief Blocks the calling thread until every event specified is complete. + * + * Wraps clWaitForEvents(). + */ +inline static cl_int +WaitForEvents(const VECTOR_CLASS& events) +{ + return detail::errHandler( + ::clWaitForEvents( + (cl_uint) events.size(), (cl_event*)&events.front()), + __WAIT_FOR_EVENTS_ERR); +} + +/*! \brief Class interface for cl_mem. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_mem as the original. For details, see + * clRetainMemObject() and clReleaseMemObject(). + * + * \see cl_mem + */ +class Memory : public detail::Wrapper +{ +public: + + /*! \brief Destructor. + * + * This calls clReleaseMemObject() on the value held by this instance. + */ + ~Memory() {} + + //! \brief Default constructor - initializes to NULL. + Memory() : detail::Wrapper() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * This calls clRetainMemObject() on the parameter's cl_mem. + */ + Memory(const Memory& memory) : detail::Wrapper(memory) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_mem + * into the new Memory object. + */ + __CL_EXPLICIT_CONSTRUCTORS Memory(const cl_mem& memory) : detail::Wrapper(memory) { } + + /*! \brief Assignment operator from Memory. + * + * This calls clRetainMemObject() on the parameter and clReleaseMemObject() + * on the previous value held by this instance. + */ + Memory& operator = (const Memory& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment operator from cl_mem - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseMemObject() on the value previously held by this instance. + */ + Memory& operator = (const cl_mem& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetMemObjectInfo(). + template + cl_int getInfo(cl_mem_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetMemObjectInfo, object_, name, param), + __GET_MEM_OBJECT_INFO_ERR); + } + + //! \brief Wrapper for clGetMemObjectInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_mem_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + +#if defined(CL_VERSION_1_1) + /*! \brief Registers a callback function to be called when the memory object + * is no longer needed. + * + * Wraps clSetMemObjectDestructorCallback(). + * + * Repeated calls to this function, for a given cl_mem value, will append + * to the list of functions called (in reverse order) when memory object's + * resources are freed and the memory object is deleted. + * + * \note + * The registered callbacks are associated with the underlying cl_mem + * value - not the Memory class instance. + */ + cl_int setDestructorCallback( + void (CL_CALLBACK * pfn_notify)(cl_mem, void *), + void * user_data = NULL) + { + return detail::errHandler( + ::clSetMemObjectDestructorCallback( + object_, + pfn_notify, + user_data), + __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); + } +#endif + +}; + +// Pre-declare copy functions +class Buffer; +template< typename IteratorType > +cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); +template< typename IteratorType > +cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); +template< typename IteratorType > +cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); +template< typename IteratorType > +cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); + + +/*! \brief Class interface for Buffer Memory Objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Buffer : public Memory +{ +public: + + /*! \brief Constructs a Buffer in a specified context. + * + * Wraps clCreateBuffer(). + * + * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was + * specified. Note alignment & exclusivity requirements. + */ + Buffer( + const Context& context, + cl_mem_flags flags, + ::size_t size, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructs a Buffer in the default context. + * + * Wraps clCreateBuffer(). + * + * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was + * specified. Note alignment & exclusivity requirements. + * + * \see Context::getDefault() + */ + Buffer( + cl_mem_flags flags, + ::size_t size, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(err); + + object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! + * \brief Construct a Buffer from a host container via iterators. + * IteratorType must be random access. + * If useHostPtr is specified iterators must represent contiguous data. + */ + template< typename IteratorType > + Buffer( + IteratorType startIterator, + IteratorType endIterator, + bool readOnly, + bool useHostPtr = false, + cl_int* err = NULL) + { + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + cl_mem_flags flags = 0; + if( readOnly ) { + flags |= CL_MEM_READ_ONLY; + } + else { + flags |= CL_MEM_READ_WRITE; + } + if( useHostPtr ) { + flags |= CL_MEM_USE_HOST_PTR; + } + + ::size_t size = sizeof(DataType)*(endIterator - startIterator); + + Context context = Context::getDefault(err); + + if( useHostPtr ) { + object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + } else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + } + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + if( !useHostPtr ) { + error = cl::copy(startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + } + + /*! + * \brief Construct a Buffer from a host container via iterators using a specified context. + * IteratorType must be random access. + * If useHostPtr is specified iterators must represent contiguous data. + */ + template< typename IteratorType > + Buffer(const Context &context, IteratorType startIterator, IteratorType endIterator, + bool readOnly, bool useHostPtr = false, cl_int* err = NULL); + + //! \brief Default constructor - initializes to NULL. + Buffer() : Memory() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Buffer(const Buffer& buffer) : Memory(buffer) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Buffer(const cl_mem& buffer) : Memory(buffer) { } + + /*! \brief Assignment from Buffer - performs shallow copy. + * + * See Memory for further details. + */ + Buffer& operator = (const Buffer& rhs) + { + if (this != &rhs) { + Memory::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Buffer& operator = (const cl_mem& rhs) + { + Memory::operator=(rhs); + return *this; + } + +#if defined(CL_VERSION_1_1) + /*! \brief Creates a new buffer object from this. + * + * Wraps clCreateSubBuffer(). + */ + Buffer createSubBuffer( + cl_mem_flags flags, + cl_buffer_create_type buffer_create_type, + const void * buffer_create_info, + cl_int * err = NULL) + { + Buffer result; + cl_int error; + result.object_ = ::clCreateSubBuffer( + object_, + flags, + buffer_create_type, + buffer_create_info, + &error); + + detail::errHandler(error, __CREATE_SUBBUFFER_ERR); + if (err != NULL) { + *err = error; + } + + return result; + } +#endif +}; + +#if defined (USE_DX_INTEROP) +/*! \brief Class interface for creating OpenCL buffers from ID3D10Buffer's. + * + * This is provided to facilitate interoperability with Direct3D. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class BufferD3D10 : public Buffer +{ +public: + typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( + cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, + cl_int* errcode_ret); + + /*! \brief Constructs a BufferD3D10, in a specified context, from a + * given ID3D10Buffer. + * + * Wraps clCreateFromD3D10BufferKHR(). + */ + BufferD3D10( + const Context& context, + cl_mem_flags flags, + ID3D10Buffer* bufobj, + cl_int * err = NULL) + { + static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL; + +#if defined(CL_VERSION_1_2) + vector props = context.getInfo(); + cl_platform platform = -1; + for( int i = 0; i < props.size(); ++i ) { + if( props[i] == CL_CONTEXT_PLATFORM ) { + platform = props[i+1]; + } + } + __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clCreateFromD3D10BufferKHR); +#endif +#if defined(CL_VERSION_1_1) + __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR); +#endif + + cl_int error; + object_ = pfn_clCreateFromD3D10BufferKHR( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + BufferD3D10() : Buffer() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS BufferD3D10(const cl_mem& buffer) : Buffer(buffer) { } + + /*! \brief Assignment from BufferD3D10 - performs shallow copy. + * + * See Memory for further details. + */ + BufferD3D10& operator = (const BufferD3D10& rhs) + { + if (this != &rhs) { + Buffer::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferD3D10& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } +}; +#endif + +/*! \brief Class interface for GL Buffer Memory Objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class BufferGL : public Buffer +{ +public: + /*! \brief Constructs a BufferGL in a specified context, from a given + * GL buffer. + * + * Wraps clCreateFromGLBuffer(). + */ + BufferGL( + const Context& context, + cl_mem_flags flags, + GLuint bufobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLBuffer( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + BufferGL() : Buffer() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + BufferGL(const BufferGL& buffer) : Buffer(buffer) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS BufferGL(const cl_mem& buffer) : Buffer(buffer) { } + + /*! \brief Assignment from BufferGL - performs shallow copy. + * + * See Memory for further details. + */ + BufferGL& operator = (const BufferGL& rhs) + { + if (this != &rhs) { + Buffer::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferGL& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetGLObjectInfo(). + cl_int getObjectInfo( + cl_gl_object_type *type, + GLuint * gl_object_name) + { + return detail::errHandler( + ::clGetGLObjectInfo(object_,type,gl_object_name), + __GET_GL_OBJECT_INFO_ERR); + } +}; + +/*! \brief Class interface for GL Render Buffer Memory Objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class BufferRenderGL : public Buffer +{ +public: + /*! \brief Constructs a BufferRenderGL in a specified context, from a given + * GL Renderbuffer. + * + * Wraps clCreateFromGLRenderbuffer(). + */ + BufferRenderGL( + const Context& context, + cl_mem_flags flags, + GLuint bufobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLRenderbuffer( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + BufferRenderGL() : Buffer() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS BufferRenderGL(const cl_mem& buffer) : Buffer(buffer) { } + + /*! \brief Assignment from BufferGL - performs shallow copy. + * + * See Memory for further details. + */ + BufferRenderGL& operator = (const BufferRenderGL& rhs) + { + if (this != &rhs) { + Buffer::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferRenderGL& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetGLObjectInfo(). + cl_int getObjectInfo( + cl_gl_object_type *type, + GLuint * gl_object_name) + { + return detail::errHandler( + ::clGetGLObjectInfo(object_,type,gl_object_name), + __GET_GL_OBJECT_INFO_ERR); + } +}; + +/*! \brief C++ base class for Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image : public Memory +{ +protected: + //! \brief Default constructor - initializes to NULL. + Image() : Memory() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Image(const Image& image) : Memory(image) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image(const cl_mem& image) : Memory(image) { } + + /*! \brief Assignment from Image - performs shallow copy. + * + * See Memory for further details. + */ + Image& operator = (const Image& rhs) + { + if (this != &rhs) { + Memory::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image& operator = (const cl_mem& rhs) + { + Memory::operator=(rhs); + return *this; + } + +public: + //! \brief Wrapper for clGetImageInfo(). + template + cl_int getImageInfo(cl_image_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetImageInfo, object_, name, param), + __GET_IMAGE_INFO_ERR); + } + + //! \brief Wrapper for clGetImageInfo() that returns by value. + template typename + detail::param_traits::param_type + getImageInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_image_info, name>::param_type param; + cl_int result = getImageInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +}; + +#if defined(CL_VERSION_1_2) +/*! \brief Class interface for 1D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image1D : public Image +{ +public: + /*! \brief Constructs a 1D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image1D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE1D, + width, + 0, 0, 0, 0, 0, 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + Image1D() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Image1D(const Image1D& image1D) : Image(image1D) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image1D(const cl_mem& image1D) : Image(image1D) { } + + /*! \brief Assignment from Image1D - performs shallow copy. + * + * See Memory for further details. + */ + Image1D& operator = (const Image1D& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image1D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; + +/*! \class Image1DBuffer + * \brief Image interface for 1D buffer images. + */ +class Image1DBuffer : public Image +{ +public: + Image1DBuffer( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + const Buffer &buffer, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE1D_BUFFER, + width, + 0, 0, 0, 0, 0, 0, 0, + buffer() + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + NULL, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + Image1DBuffer() { } + + Image1DBuffer(const Image1DBuffer& image1D) : Image(image1D) { } + + __CL_EXPLICIT_CONSTRUCTORS Image1DBuffer(const cl_mem& image1D) : Image(image1D) { } + + Image1DBuffer& operator = (const Image1DBuffer& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + Image1DBuffer& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; + +/*! \class Image1DArray + * \brief Image interface for arrays of 1D images. + */ +class Image1DArray : public Image +{ +public: + Image1DArray( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t arraySize, + ::size_t width, + ::size_t rowPitch, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE1D_ARRAY, + width, + 0, 0, // height, depth (unused) + arraySize, + rowPitch, + 0, 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + Image1DArray() { } + + Image1DArray(const Image1DArray& imageArray) : Image(imageArray) { } + + __CL_EXPLICIT_CONSTRUCTORS Image1DArray(const cl_mem& imageArray) : Image(imageArray) { } + + Image1DArray& operator = (const Image1DArray& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + Image1DArray& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; +#endif // #if defined(CL_VERSION_1_2) + + +/*! \brief Class interface for 2D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image2D : public Image +{ +public: + /*! \brief Constructs a 1D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image2D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + ::size_t height, + ::size_t row_pitch = 0, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + bool useCreateImage; + +#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above + } +#elif defined(CL_VERSION_1_2) + useCreateImage = true; +#else + useCreateImage = false; +#endif + +#if defined(CL_VERSION_1_2) + if (useCreateImage) + { + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE2D, + width, + height, + 0, 0, // depth, array size (unused) + row_pitch, + 0, 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if defined(CL_VERSION_1_2) +#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + if (!useCreateImage) + { + object_ = ::clCreateImage2D( + context(), flags,&format, width, height, row_pitch, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE2D_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + } + + //! \brief Default constructor - initializes to NULL. + Image2D() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Image2D(const Image2D& image2D) : Image(image2D) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image2D(const cl_mem& image2D) : Image(image2D) { } + + /*! \brief Assignment from Image2D - performs shallow copy. + * + * See Memory for further details. + */ + Image2D& operator = (const Image2D& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image2D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; + + +#if !defined(CL_VERSION_1_2) +/*! \brief Class interface for GL 2D Image Memory objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + * \note Deprecated for OpenCL 1.2. Please use ImageGL instead. + */ +class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED : public Image2D +{ +public: + /*! \brief Constructs an Image2DGL in a specified context, from a given + * GL Texture. + * + * Wraps clCreateFromGLTexture2D(). + */ + Image2DGL( + const Context& context, + cl_mem_flags flags, + GLenum target, + GLint miplevel, + GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture2D( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_2D_ERR); + if (err != NULL) { + *err = error; + } + + } + + //! \brief Default constructor - initializes to NULL. + Image2DGL() : Image2D() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Image2DGL(const Image2DGL& image) : Image2D(image) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image2DGL(const cl_mem& image) : Image2D(image) { } + + /*! \brief Assignment from Image2DGL - performs shallow copy. + * + * See Memory for further details. + */ + Image2DGL& operator = (const Image2DGL& rhs) + { + if (this != &rhs) { + Image2D::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image2DGL& operator = (const cl_mem& rhs) + { + Image2D::operator=(rhs); + return *this; + } +}; +#endif // #if !defined(CL_VERSION_1_2) + +#if defined(CL_VERSION_1_2) +/*! \class Image2DArray + * \brief Image interface for arrays of 2D images. + */ +class Image2DArray : public Image +{ +public: + Image2DArray( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t arraySize, + ::size_t width, + ::size_t height, + ::size_t rowPitch, + ::size_t slicePitch, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE2D_ARRAY, + width, + height, + 0, // depth (unused) + arraySize, + rowPitch, + slicePitch, + 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + Image2DArray() { } + + Image2DArray(const Image2DArray& imageArray) : Image(imageArray) { } + + __CL_EXPLICIT_CONSTRUCTORS Image2DArray(const cl_mem& imageArray) : Image(imageArray) { } + + Image2DArray& operator = (const Image2DArray& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + Image2DArray& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; +#endif // #if defined(CL_VERSION_1_2) + +/*! \brief Class interface for 3D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image3D : public Image +{ +public: + /*! \brief Constructs a 3D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image3D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + ::size_t height, + ::size_t depth, + ::size_t row_pitch = 0, + ::size_t slice_pitch = 0, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + bool useCreateImage; + +#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above + } +#elif defined(CL_VERSION_1_2) + useCreateImage = true; +#else + useCreateImage = false; +#endif + +#if defined(CL_VERSION_1_2) + if (useCreateImage) + { + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE3D, + width, + height, + depth, + 0, // array size (unused) + row_pitch, + slice_pitch, + 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if defined(CL_VERSION_1_2) +#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + if (!useCreateImage) + { + object_ = ::clCreateImage3D( + context(), flags, &format, width, height, depth, row_pitch, + slice_pitch, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE3D_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + } + + //! \brief Default constructor - initializes to NULL. + Image3D() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Image3D(const Image3D& image3D) : Image(image3D) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image3D(const cl_mem& image3D) : Image(image3D) { } + + /*! \brief Assignment from Image3D - performs shallow copy. + * + * See Memory for further details. + */ + Image3D& operator = (const Image3D& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image3D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; + +#if !defined(CL_VERSION_1_2) +/*! \brief Class interface for GL 3D Image Memory objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image3DGL : public Image3D +{ +public: + /*! \brief Constructs an Image3DGL in a specified context, from a given + * GL Texture. + * + * Wraps clCreateFromGLTexture3D(). + */ + Image3DGL( + const Context& context, + cl_mem_flags flags, + GLenum target, + GLint miplevel, + GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture3D( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_3D_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + Image3DGL() : Image3D() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Image3DGL(const Image3DGL& image) : Image3D(image) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image3DGL(const cl_mem& image) : Image3D(image) { } + + /*! \brief Assignment from Image3DGL - performs shallow copy. + * + * See Memory for further details. + */ + Image3DGL& operator = (const Image3DGL& rhs) + { + if (this != &rhs) { + Image3D::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image3DGL& operator = (const cl_mem& rhs) + { + Image3D::operator=(rhs); + return *this; + } +}; +#endif // #if !defined(CL_VERSION_1_2) + +#if defined(CL_VERSION_1_2) +/*! \class ImageGL + * \brief general image interface for GL interop. + * We abstract the 2D and 3D GL images into a single instance here + * that wraps all GL sourced images on the grounds that setup information + * was performed by OpenCL anyway. + */ +class ImageGL : public Image +{ +public: + ImageGL( + const Context& context, + cl_mem_flags flags, + GLenum target, + GLint miplevel, + GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_ERR); + if (err != NULL) { + *err = error; + } + } + + ImageGL() : Image() { } + + ImageGL(const ImageGL& image) : Image(image) { } + + __CL_EXPLICIT_CONSTRUCTORS ImageGL(const cl_mem& image) : Image(image) { } + + ImageGL& operator = (const ImageGL& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + ImageGL& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; +#endif // #if defined(CL_VERSION_1_2) + +/*! \brief Class interface for cl_sampler. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_sampler as the original. For details, see + * clRetainSampler() and clReleaseSampler(). + * + * \see cl_sampler + */ +class Sampler : public detail::Wrapper +{ +public: + /*! \brief Destructor. + * + * This calls clReleaseSampler() on the value held by this instance. + */ + ~Sampler() { } + + //! \brief Default constructor - initializes to NULL. + Sampler() { } + + /*! \brief Constructs a Sampler in a specified context. + * + * Wraps clCreateSampler(). + */ + Sampler( + const Context& context, + cl_bool normalized_coords, + cl_addressing_mode addressing_mode, + cl_filter_mode filter_mode, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateSampler( + context(), + normalized_coords, + addressing_mode, + filter_mode, + &error); + + detail::errHandler(error, __CREATE_SAMPLER_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Copy constructor - performs shallow copy. + * + * This calls clRetainSampler() on the parameter's cl_sampler. + */ + Sampler(const Sampler& sampler) : detail::Wrapper(sampler) { } + + /*! \brief Constructor from cl_sampler - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_sampler + * into the new Sampler object. + */ + Sampler(const cl_sampler& sampler) : detail::Wrapper(sampler) { } + + /*! \brief Assignment operator from Sampler. + * + * This calls clRetainSampler() on the parameter and clReleaseSampler() + * on the previous value held by this instance. + */ + Sampler& operator = (const Sampler& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment operator from cl_sampler - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseSampler() on the value previously held by this instance. + */ + Sampler& operator = (const cl_sampler& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetSamplerInfo(). + template + cl_int getInfo(cl_sampler_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetSamplerInfo, object_, name, param), + __GET_SAMPLER_INFO_ERR); + } + + //! \brief Wrapper for clGetSamplerInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_sampler_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +}; + +class Program; +class CommandQueue; +class Kernel; + +//! \brief Class interface for specifying NDRange values. +class NDRange +{ +private: + size_t<3> sizes_; + cl_uint dimensions_; + +public: + //! \brief Default constructor - resulting range has zero dimensions. + NDRange() + : dimensions_(0) + { } + + //! \brief Constructs one-dimensional range. + NDRange(::size_t size0) + : dimensions_(1) + { + sizes_[0] = size0; + } + + //! \brief Constructs two-dimensional range. + NDRange(::size_t size0, ::size_t size1) + : dimensions_(2) + { + sizes_[0] = size0; + sizes_[1] = size1; + } + + //! \brief Constructs three-dimensional range. + NDRange(::size_t size0, ::size_t size1, ::size_t size2) + : dimensions_(3) + { + sizes_[0] = size0; + sizes_[1] = size1; + sizes_[2] = size2; + } + + /*! \brief Conversion operator to const ::size_t *. + * + * \returns a pointer to the size of the first dimension. + */ + operator const ::size_t*() const { + return (const ::size_t*) sizes_; + } + + //! \brief Queries the number of dimensions in the range. + ::size_t dimensions() const { return dimensions_; } +}; + +//! \brief A zero-dimensional range. +static const NDRange NullRange; + +//! \brief Local address wrapper for use with Kernel::setArg +struct LocalSpaceArg +{ + ::size_t size_; +}; + +namespace detail { + +template +struct KernelArgumentHandler +{ + static ::size_t size(const T&) { return sizeof(T); } + static T* ptr(T& value) { return &value; } +}; + +template <> +struct KernelArgumentHandler +{ + static ::size_t size(const LocalSpaceArg& value) { return value.size_; } + static void* ptr(LocalSpaceArg&) { return NULL; } +}; + +} +//! \endcond + +/*! __local + * \brief Helper function for generating LocalSpaceArg objects. + * Deprecated. Replaced with Local. + */ +inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED LocalSpaceArg +__local(::size_t size) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +inline LocalSpaceArg +__local(::size_t size) +{ + LocalSpaceArg ret = { size }; + return ret; +} + +/*! Local + * \brief Helper function for generating LocalSpaceArg objects. + */ +inline LocalSpaceArg +Local(::size_t size) +{ + LocalSpaceArg ret = { size }; + return ret; +} + +//class KernelFunctor; + +/*! \brief Class interface for cl_kernel. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_kernel as the original. For details, see + * clRetainKernel() and clReleaseKernel(). + * + * \see cl_kernel + */ +class Kernel : public detail::Wrapper +{ +public: + inline Kernel(const Program& program, const char* name, cl_int* err = NULL); + + /*! \brief Destructor. + * + * This calls clReleaseKernel() on the value held by this instance. + */ + ~Kernel() { } + + //! \brief Default constructor - initializes to NULL. + Kernel() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * This calls clRetainKernel() on the parameter's cl_kernel. + */ + Kernel(const Kernel& kernel) : detail::Wrapper(kernel) { } + + /*! \brief Constructor from cl_kernel - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_kernel + * into the new Kernel object. + */ + __CL_EXPLICIT_CONSTRUCTORS Kernel(const cl_kernel& kernel) : detail::Wrapper(kernel) { } + + /*! \brief Assignment operator from Kernel. + * + * This calls clRetainKernel() on the parameter and clReleaseKernel() + * on the previous value held by this instance. + */ + Kernel& operator = (const Kernel& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment operator from cl_kernel - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseKernel() on the value previously held by this instance. + */ + Kernel& operator = (const cl_kernel& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + template + cl_int getInfo(cl_kernel_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetKernelInfo, object_, name, param), + __GET_KERNEL_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + +#if defined(CL_VERSION_1_2) + template + cl_int getArgInfo(cl_uint argIndex, cl_kernel_arg_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetKernelArgInfo, object_, argIndex, name, param), + __GET_KERNEL_ARG_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getArgInfo(cl_uint argIndex, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_arg_info, name>::param_type param; + cl_int result = getArgInfo(argIndex, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +#endif // #if defined(CL_VERSION_1_2) + + template + cl_int getWorkGroupInfo( + const Device& device, cl_kernel_work_group_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetKernelWorkGroupInfo, object_, device(), name, param), + __GET_KERNEL_WORK_GROUP_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getWorkGroupInfo(const Device& device, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_work_group_info, name>::param_type param; + cl_int result = getWorkGroupInfo(device, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + template + cl_int setArg(cl_uint index, T value) + { + return detail::errHandler( + ::clSetKernelArg( + object_, + index, + detail::KernelArgumentHandler::size(value), + detail::KernelArgumentHandler::ptr(value)), + __SET_KERNEL_ARGS_ERR); + } + + cl_int setArg(cl_uint index, ::size_t size, void* argPtr) + { + return detail::errHandler( + ::clSetKernelArg(object_, index, size, argPtr), + __SET_KERNEL_ARGS_ERR); + } +}; + +/*! \class Program + * \brief Program interface that implements cl_program. + */ +class Program : public detail::Wrapper +{ +public: + typedef VECTOR_CLASS > Binaries; + typedef VECTOR_CLASS > Sources; + + Program( + const STRING_CLASS& source, + bool build = false, + cl_int* err = NULL) + { + cl_int error; + + const char * strings = source.c_str(); + const ::size_t length = source.size(); + + Context context = Context::getDefault(err); + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)1, &strings, &length, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + + if (error == CL_SUCCESS && build) { + + error = ::clBuildProgram( + object_, + 0, + NULL, + "", + NULL, + NULL); + + detail::errHandler(error, __BUILD_PROGRAM_ERR); + } + + if (err != NULL) { + *err = error; + } + } + + Program( + const Context& context, + const STRING_CLASS& source, + bool build = false, + cl_int* err = NULL) + { + cl_int error; + + const char * strings = source.c_str(); + const ::size_t length = source.size(); + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)1, &strings, &length, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + + if (error == CL_SUCCESS && build) { + + error = ::clBuildProgram( + object_, + 0, + NULL, + "", + NULL, + NULL); + + detail::errHandler(error, __BUILD_PROGRAM_ERR); + } + + if (err != NULL) { + *err = error; + } + } + + Program( + const Context& context, + const Sources& sources, + cl_int* err = NULL) + { + cl_int error; + + const ::size_t n = (::size_t)sources.size(); + ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); + const char** strings = (const char**) alloca(n * sizeof(const char*)); + + for (::size_t i = 0; i < n; ++i) { + strings[i] = sources[(int)i].first; + lengths[i] = sources[(int)i].second; + } + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)n, strings, lengths, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + if (err != NULL) { + *err = error; + } + } + + /** + * Construct a program object from a list of devices and a per-device list of binaries. + * \param context A valid OpenCL context in which to construct the program. + * \param devices A vector of OpenCL device objects for which the program will be created. + * \param binaries A vector of pairs of a pointer to a binary object and its length. + * \param binaryStatus An optional vector that on completion will be resized to + * match the size of binaries and filled with values to specify if each binary + * was successfully loaded. + * Set to CL_SUCCESS if the binary was successfully loaded. + * Set to CL_INVALID_VALUE if the length is 0 or the binary pointer is NULL. + * Set to CL_INVALID_BINARY if the binary provided is not valid for the matching device. + * \param err if non-NULL will be set to CL_SUCCESS on successful operation or one of the following errors: + * CL_INVALID_CONTEXT if context is not a valid context. + * CL_INVALID_VALUE if the length of devices is zero; or if the length of binaries does not match the length of devices; + * or if any entry in binaries is NULL or has length 0. + * CL_INVALID_DEVICE if OpenCL devices listed in devices are not in the list of devices associated with context. + * CL_INVALID_BINARY if an invalid program binary was encountered for any device. binaryStatus will return specific status for each device. + * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host. + */ + Program( + const Context& context, + const VECTOR_CLASS& devices, + const Binaries& binaries, + VECTOR_CLASS* binaryStatus = NULL, + cl_int* err = NULL) + { + cl_int error; + + const ::size_t numDevices = devices.size(); + + // Catch size mismatch early and return + if(binaries.size() != numDevices) { + error = CL_INVALID_VALUE; + detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); + if (err != NULL) { + *err = error; + } + return; + } + + ::size_t* lengths = (::size_t*) alloca(numDevices * sizeof(::size_t)); + const unsigned char** images = (const unsigned char**) alloca(numDevices * sizeof(const unsigned char**)); + + for (::size_t i = 0; i < numDevices; ++i) { + images[i] = (const unsigned char*)binaries[i].first; + lengths[i] = binaries[(int)i].second; + } + + cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); + for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + if(binaryStatus) { + binaryStatus->resize(numDevices); + } + + object_ = ::clCreateProgramWithBinary( + context(), (cl_uint) devices.size(), + deviceIDs, + lengths, images, binaryStatus != NULL + ? &binaryStatus->front() + : NULL, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); + if (err != NULL) { + *err = error; + } + } + + +#if defined(CL_VERSION_1_2) + /** + * Create program using builtin kernels. + * \param kernelNames Semi-colon separated list of builtin kernel names + */ + Program( + const Context& context, + const VECTOR_CLASS& devices, + const STRING_CLASS& kernelNames, + cl_int* err = NULL) + { + cl_int error; + + + ::size_t numDevices = devices.size(); + cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); + for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + object_ = ::clCreateProgramWithBuiltInKernels( + context(), + (cl_uint) devices.size(), + deviceIDs, + kernelNames.c_str(), + &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if defined(CL_VERSION_1_2) + + Program() { } + + Program(const Program& program) : detail::Wrapper(program) { } + + __CL_EXPLICIT_CONSTRUCTORS Program(const cl_program& program) : detail::Wrapper(program) { } + + Program& operator = (const Program& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + Program& operator = (const cl_program& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + cl_int build( + const VECTOR_CLASS& devices, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + ::size_t numDevices = devices.size(); + cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); + for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + return detail::errHandler( + ::clBuildProgram( + object_, + (cl_uint) + devices.size(), + deviceIDs, + options, + notifyFptr, + data), + __BUILD_PROGRAM_ERR); + } + + cl_int build( + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + return detail::errHandler( + ::clBuildProgram( + object_, + 0, + NULL, + options, + notifyFptr, + data), + __BUILD_PROGRAM_ERR); + } + +#if defined(CL_VERSION_1_2) + cl_int compile( + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + return detail::errHandler( + ::clCompileProgram( + object_, + 0, + NULL, + options, + 0, + NULL, + NULL, + notifyFptr, + data), + __COMPILE_PROGRAM_ERR); + } +#endif + + template + cl_int getInfo(cl_program_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetProgramInfo, object_, name, param), + __GET_PROGRAM_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_program_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + template + cl_int getBuildInfo( + const Device& device, cl_program_build_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetProgramBuildInfo, object_, device(), name, param), + __GET_PROGRAM_BUILD_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getBuildInfo(const Device& device, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_program_build_info, name>::param_type param; + cl_int result = getBuildInfo(device, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + cl_int createKernels(VECTOR_CLASS* kernels) + { + cl_uint numKernels; + cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); + } + + Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel)); + err = ::clCreateKernelsInProgram( + object_, numKernels, (cl_kernel*) value, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); + } + + kernels->assign(&value[0], &value[numKernels]); + return CL_SUCCESS; + } +}; + +#if defined(CL_VERSION_1_2) +inline Program linkProgram( + Program input1, + Program input2, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL, + cl_int* err = NULL) +{ + cl_int err_local = CL_SUCCESS; + + cl_program programs[2] = { input1(), input2() }; + + Context ctx = input1.getInfo(); + + cl_program prog = ::clLinkProgram( + ctx(), + 0, + NULL, + options, + 2, + programs, + notifyFptr, + data, + &err_local); + + detail::errHandler(err_local,__COMPILE_PROGRAM_ERR); + if (err != NULL) { + *err = err_local; + } + + return Program(prog); +} + +inline Program linkProgram( + VECTOR_CLASS inputPrograms, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL, + cl_int* err = NULL) +{ + cl_int err_local = CL_SUCCESS; + + cl_program * programs = (cl_program*) alloca(inputPrograms.size() * sizeof(cl_program)); + + if (programs != NULL) { + for (unsigned int i = 0; i < inputPrograms.size(); i++) { + programs[i] = inputPrograms[i](); + } + } + + cl_program prog = ::clLinkProgram( + Context::getDefault()(), + 0, + NULL, + options, + (cl_uint)inputPrograms.size(), + programs, + notifyFptr, + data, + &err_local); + + detail::errHandler(err_local,__COMPILE_PROGRAM_ERR); + if (err != NULL) { + *err = err_local; + } + + return Program(prog); +} +#endif + +template<> +inline VECTOR_CLASS cl::Program::getInfo(cl_int* err) const +{ + VECTOR_CLASS< ::size_t> sizes = getInfo(); + VECTOR_CLASS binaries; + for (VECTOR_CLASS< ::size_t>::iterator s = sizes.begin(); s != sizes.end(); ++s) + { + char *ptr = NULL; + if (*s != 0) + ptr = new char[*s]; + binaries.push_back(ptr); + } + + cl_int result = getInfo(CL_PROGRAM_BINARIES, &binaries); + if (err != NULL) { + *err = result; + } + return binaries; +} + +inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) +{ + cl_int error; + + object_ = ::clCreateKernel(program(), name, &error); + detail::errHandler(error, __CREATE_KERNEL_ERR); + + if (err != NULL) { + *err = error; + } + +} + +/*! \class CommandQueue + * \brief CommandQueue interface for cl_command_queue. + */ +class CommandQueue : public detail::Wrapper +{ +private: + static volatile int default_initialized_; + static CommandQueue default_; + static volatile cl_int default_error_; +public: + CommandQueue( + cl_command_queue_properties properties, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + Device device = context.getInfo()[0]; + + object_ = ::clCreateCommandQueue( + context(), device(), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } + } + /*! + * \brief Constructs a CommandQueue for an implementation defined device in the given context + */ + explicit CommandQueue( + const Context& context, + cl_command_queue_properties properties = 0, + cl_int* err = NULL) + { + cl_int error; + VECTOR_CLASS devices; + error = context.getInfo(CL_CONTEXT_DEVICES, &devices); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (error != CL_SUCCESS) + { + if (err != NULL) { + *err = error; + } + return; + } + + object_ = ::clCreateCommandQueue(context(), devices[0](), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (err != NULL) { + *err = error; + } + + } + + CommandQueue( + const Context& context, + const Device& device, + cl_command_queue_properties properties = 0, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateCommandQueue( + context(), device(), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } + + static CommandQueue getDefault(cl_int * err = NULL) + { + int state = detail::compare_exchange( + &default_initialized_, + __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); + + if (state & __DEFAULT_INITIALIZED) { + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + if (state & __DEFAULT_BEING_INITIALIZED) { + // Assume writes will propagate eventually... + while(default_initialized_ != __DEFAULT_INITIALIZED) { + detail::fence(); + } + + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + cl_int error; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + Device device = context.getInfo()[0]; + + default_ = CommandQueue(context, device, 0, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } + + detail::fence(); + + default_error_ = error; + // Assume writes will propagate eventually... + default_initialized_ = __DEFAULT_INITIALIZED; + + detail::fence(); + + if (err != NULL) { + *err = default_error_; + } + return default_; + + } + + CommandQueue() { } + + CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper(commandQueue) { } + + CommandQueue(const cl_command_queue& commandQueue) : detail::Wrapper(commandQueue) { } + + CommandQueue& operator = (const CommandQueue& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + CommandQueue& operator = (const cl_command_queue& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + template + cl_int getInfo(cl_command_queue_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetCommandQueueInfo, object_, name, param), + __GET_COMMAND_QUEUE_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_command_queue_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + cl_int enqueueReadBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadBuffer( + object_, buffer(), blocking, offset, size, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_READ_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueWriteBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + const void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteBuffer( + object_, buffer(), blocking, offset, size, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_WRITE_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyBuffer( + const Buffer& src, + const Buffer& dst, + ::size_t src_offset, + ::size_t dst_offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBuffer( + object_, src(), dst(), src_offset, dst_offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQEUE_COPY_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueReadBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadBufferRect( + object_, + buffer(), + blocking, + (const ::size_t *)buffer_offset, + (const ::size_t *)host_offset, + (const ::size_t *)region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_READ_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueWriteBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteBufferRect( + object_, + buffer(), + blocking, + (const ::size_t *)buffer_offset, + (const ::size_t *)host_offset, + (const ::size_t *)region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_WRITE_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyBufferRect( + const Buffer& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + ::size_t src_row_pitch, + ::size_t src_slice_pitch, + ::size_t dst_row_pitch, + ::size_t dst_slice_pitch, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBufferRect( + object_, + src(), + dst(), + (const ::size_t *)src_origin, + (const ::size_t *)dst_origin, + (const ::size_t *)region, + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQEUE_COPY_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined(CL_VERSION_1_2) + /** + * Enqueue a command to fill a buffer object with a pattern + * of a given size. The pattern is specified a as vector. + * \tparam PatternType The datatype of the pattern field. + * The pattern type must be an accepted OpenCL data type. + */ + template + cl_int enqueueFillBuffer( + const Buffer& buffer, + PatternType pattern, + ::size_t offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillBuffer( + object_, + buffer(), + static_cast(&pattern), + sizeof(PatternType), + offset, + size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if defined(CL_VERSION_1_2) + + cl_int enqueueReadImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadImage( + object_, image(), blocking, (const ::size_t *) origin, + (const ::size_t *) region, row_pitch, slice_pitch, ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_READ_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueWriteImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteImage( + object_, image(), blocking, (const ::size_t *) origin, + (const ::size_t *) region, row_pitch, slice_pitch, ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_WRITE_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyImage( + const Image& src, + const Image& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyImage( + object_, src(), dst(), (const ::size_t *) src_origin, + (const ::size_t *)dst_origin, (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_COPY_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined(CL_VERSION_1_2) + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA floating-point color value if + * the image channel data type is not an unnormalized signed or + * unsigned data type. + */ + cl_int enqueueFillImage( + const Image& image, + cl_float4 fillColor, + const size_t<3>& origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + (const ::size_t *) origin, + (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA signed integer color value if + * the image channel data type is an unnormalized signed integer + * type. + */ + cl_int enqueueFillImage( + const Image& image, + cl_int4 fillColor, + const size_t<3>& origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + (const ::size_t *) origin, + (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA unsigned integer color value if + * the image channel data type is an unnormalized unsigned integer + * type. + */ + cl_int enqueueFillImage( + const Image& image, + cl_uint4 fillColor, + const size_t<3>& origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + (const ::size_t *) origin, + (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if defined(CL_VERSION_1_2) + + cl_int enqueueCopyImageToBuffer( + const Image& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& region, + ::size_t dst_offset, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyImageToBuffer( + object_, src(), dst(), (const ::size_t *) src_origin, + (const ::size_t *) region, dst_offset, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyBufferToImage( + const Buffer& src, + const Image& dst, + ::size_t src_offset, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBufferToImage( + object_, src(), dst(), src_offset, + (const ::size_t *) dst_origin, (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + void* enqueueMapBuffer( + const Buffer& buffer, + cl_bool blocking, + cl_map_flags flags, + ::size_t offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL, + cl_int* err = NULL) const + { + cl_int error; + void * result = ::clEnqueueMapBuffer( + object_, buffer(), blocking, flags, offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + return result; + } + + void* enqueueMapImage( + const Image& buffer, + cl_bool blocking, + cl_map_flags flags, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t * row_pitch, + ::size_t * slice_pitch, + const VECTOR_CLASS* events = NULL, + Event* event = NULL, + cl_int* err = NULL) const + { + cl_int error; + void * result = ::clEnqueueMapImage( + object_, buffer(), blocking, flags, + (const ::size_t *) origin, (const ::size_t *) region, + row_pitch, slice_pitch, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + return result; + } + + cl_int enqueueUnmapMemObject( + const Memory& memory, + void* mapped_ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueUnmapMemObject( + object_, memory(), mapped_ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined(CL_VERSION_1_2) + /** + * Enqueues a marker command which waits for either a list of events to complete, + * or all previously enqueued commands to complete. + * + * Enqueues a marker command which waits for either a list of events to complete, + * or if the list is empty it waits for all commands previously enqueued in command_queue + * to complete before it completes. This command returns an event which can be waited on, + * i.e. this event can be waited on to insure that all events either in the event_wait_list + * or all previously enqueued commands, queued before this command to command_queue, + * have completed. + */ + cl_int enqueueMarkerWithWaitList( + const VECTOR_CLASS *events = 0, + Event *event = 0) + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueMarkerWithWaitList( + object_, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MARKER_WAIT_LIST_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * A synchronization point that enqueues a barrier operation. + * + * Enqueues a barrier command which waits for either a list of events to complete, + * or if the list is empty it waits for all commands previously enqueued in command_queue + * to complete before it completes. This command blocks command execution, that is, any + * following commands enqueued after it do not execute until it completes. This command + * returns an event which can be waited on, i.e. this event can be waited on to insure that + * all events either in the event_wait_list or all previously enqueued commands, queued + * before this command to command_queue, have completed. + */ + cl_int enqueueBarrierWithWaitList( + const VECTOR_CLASS *events = 0, + Event *event = 0) + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueBarrierWithWaitList( + object_, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_BARRIER_WAIT_LIST_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueues a command to indicate with which device a set of memory objects + * should be associated. + */ + cl_int enqueueMigrateMemObjects( + const VECTOR_CLASS &memObjects, + cl_mem_migration_flags flags, + const VECTOR_CLASS* events = NULL, + Event* event = NULL + ) + { + cl_event tmp; + + cl_mem* localMemObjects = static_cast(alloca(memObjects.size() * sizeof(cl_mem))); + for( int i = 0; i < (int)memObjects.size(); ++i ) { + localMemObjects[i] = memObjects[i](); + } + + + cl_int err = detail::errHandler( + ::clEnqueueMigrateMemObjects( + object_, + (cl_uint)memObjects.size(), + static_cast(localMemObjects), + flags, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if defined(CL_VERSION_1_2) + + cl_int enqueueNDRangeKernel( + const Kernel& kernel, + const NDRange& offset, + const NDRange& global, + const NDRange& local = NullRange, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueNDRangeKernel( + object_, kernel(), (cl_uint) global.dimensions(), + offset.dimensions() != 0 ? (const ::size_t*) offset : NULL, + (const ::size_t*) global, + local.dimensions() != 0 ? (const ::size_t*) local : NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_NDRANGE_KERNEL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueTask( + const Kernel& kernel, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueTask( + object_, kernel(), + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_TASK_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueNativeKernel( + void (CL_CALLBACK *userFptr)(void *), + std::pair args, + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* mem_locs = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) + ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem)) + : NULL; + + if (mems != NULL) { + for (unsigned int i = 0; i < mem_objects->size(); i++) { + mems[i] = ((*mem_objects)[i])(); + } + } + + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueNativeKernel( + object_, userFptr, args.first, args.second, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + mems, + (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_NATIVE_KERNEL); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) + CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueMarker(Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + { + return detail::errHandler( + ::clEnqueueMarker(object_, (cl_event*) event), + __ENQUEUE_MARKER_ERR); + } + + CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueWaitForEvents(const VECTOR_CLASS& events) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + { + return detail::errHandler( + ::clEnqueueWaitForEvents( + object_, + (cl_uint) events.size(), + (const cl_event*) &events.front()), + __ENQUEUE_WAIT_FOR_EVENTS_ERR); + } +#endif // #if defined(CL_VERSION_1_1) + + cl_int enqueueAcquireGLObjects( + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueAcquireGLObjects( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_ACQUIRE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueReleaseGLObjects( + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReleaseGLObjects( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_RELEASE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined (USE_DX_INTEROP) +typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event); +typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event); + + cl_int enqueueAcquireD3D10Objects( + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; +#if defined(CL_VERSION_1_2) + cl_context context = getInfo(); + cl::Device device(getInfo()); + cl_platform_id platform = device.getInfo(); + __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueAcquireD3D10ObjectsKHR); +#endif +#if defined(CL_VERSION_1_1) + __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR); +#endif + + cl_event tmp; + cl_int err = detail::errHandler( + pfn_clEnqueueAcquireD3D10ObjectsKHR( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_ACQUIRE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueReleaseD3D10Objects( + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; +#if defined(CL_VERSION_1_2) + cl_context context = getInfo(); + cl::Device device(getInfo()); + cl_platform_id platform = device.getInfo(); + __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueReleaseD3D10ObjectsKHR); +#endif // #if defined(CL_VERSION_1_2) +#if defined(CL_VERSION_1_1) + __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR); +#endif // #if defined(CL_VERSION_1_1) + + cl_event tmp; + cl_int err = detail::errHandler( + pfn_clEnqueueReleaseD3D10ObjectsKHR( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_RELEASE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) + CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueBarrier() const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + { + return detail::errHandler( + ::clEnqueueBarrier(object_), + __ENQUEUE_BARRIER_ERR); + } +#endif // #if defined(CL_VERSION_1_1) + + cl_int flush() const + { + return detail::errHandler(::clFlush(object_), __FLUSH_ERR); + } + + cl_int finish() const + { + return detail::errHandler(::clFinish(object_), __FINISH_ERR); + } +}; + +#ifdef _WIN32 +__declspec(selectany) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +__declspec(selectany) CommandQueue CommandQueue::default_; +__declspec(selectany) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS; +#else +__attribute__((weak)) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +__attribute__((weak)) CommandQueue CommandQueue::default_; +__attribute__((weak)) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS; +#endif + +template< typename IteratorType > +Buffer::Buffer( + const Context &context, + IteratorType startIterator, + IteratorType endIterator, + bool readOnly, + bool useHostPtr, + cl_int* err) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + cl_mem_flags flags = 0; + if( readOnly ) { + flags |= CL_MEM_READ_ONLY; + } + else { + flags |= CL_MEM_READ_WRITE; + } + if( useHostPtr ) { + flags |= CL_MEM_USE_HOST_PTR; + } + + ::size_t size = sizeof(DataType)*(endIterator - startIterator); + + if( useHostPtr ) { + object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + } else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + } + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + if( !useHostPtr ) { + CommandQueue queue(context, 0, &error); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + error = cl::copy(queue, startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } +} + +inline cl_int enqueueReadBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadBuffer(buffer, blocking, offset, size, ptr, events, event); +} + +inline cl_int enqueueWriteBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + const void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteBuffer(buffer, blocking, offset, size, ptr, events, event); +} + +inline void* enqueueMapBuffer( + const Buffer& buffer, + cl_bool blocking, + cl_map_flags flags, + ::size_t offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL, + cl_int* err = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + void * result = ::clEnqueueMapBuffer( + queue(), buffer(), blocking, flags, offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + return result; +} + +inline cl_int enqueueUnmapMemObject( + const Memory& memory, + void* mapped_ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (error != CL_SUCCESS) { + return error; + } + + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueUnmapMemObject( + queue(), memory(), mapped_ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; +} + +inline cl_int enqueueCopyBuffer( + const Buffer& src, + const Buffer& dst, + ::size_t src_offset, + ::size_t dst_offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBuffer(src, dst, src_offset, dst_offset, size, events, event); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Host to Device. + * Uses default command queue. + */ +template< typename IteratorType > +inline cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) + return error; + + return cl::copy(queue, startIterator, endIterator, buffer); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Device to Host. + * Uses default command queue. + */ +template< typename IteratorType > +inline cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) + return error; + + return cl::copy(queue, buffer, startIterator, endIterator); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Host to Device. + * Uses specified queue. + */ +template< typename IteratorType > +inline cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + ::size_t length = endIterator-startIterator; + ::size_t byteLength = length*sizeof(DataType); + + DataType *pointer = + static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_WRITE, 0, byteLength, 0, 0, &error)); + // if exceptions enabled, enqueueMapBuffer will throw + if( error != CL_SUCCESS ) { + return error; + } +#if defined(_MSC_VER) + std::copy( + startIterator, + endIterator, + stdext::checked_array_iterator( + pointer, length)); +#else + std::copy(startIterator, endIterator, pointer); +#endif + Event endEvent; + error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); + // if exceptions enabled, enqueueUnmapMemObject will throw + if( error != CL_SUCCESS ) { + return error; + } + endEvent.wait(); + return CL_SUCCESS; +} + +/** + * Blocking copy operation between iterators and a buffer. + * Device to Host. + * Uses specified queue. + */ +template< typename IteratorType > +inline cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + ::size_t length = endIterator-startIterator; + ::size_t byteLength = length*sizeof(DataType); + + DataType *pointer = + static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, byteLength, 0, 0, &error)); + // if exceptions enabled, enqueueMapBuffer will throw + if( error != CL_SUCCESS ) { + return error; + } + std::copy(pointer, pointer + length, startIterator); + Event endEvent; + error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); + // if exceptions enabled, enqueueUnmapMemObject will throw + if( error != CL_SUCCESS ) { + return error; + } + endEvent.wait(); + return CL_SUCCESS; +} + +#if defined(CL_VERSION_1_1) +inline cl_int enqueueReadBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadBufferRect( + buffer, + blocking, + buffer_offset, + host_offset, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueWriteBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteBufferRect( + buffer, + blocking, + buffer_offset, + host_offset, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueCopyBufferRect( + const Buffer& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + ::size_t src_row_pitch, + ::size_t src_slice_pitch, + ::size_t dst_row_pitch, + ::size_t dst_slice_pitch, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBufferRect( + src, + dst, + src_origin, + dst_origin, + region, + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + events, + event); +} +#endif + +inline cl_int enqueueReadImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadImage( + image, + blocking, + origin, + region, + row_pitch, + slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueWriteImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteImage( + image, + blocking, + origin, + region, + row_pitch, + slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueCopyImage( + const Image& src, + const Image& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyImage( + src, + dst, + src_origin, + dst_origin, + region, + events, + event); +} + +inline cl_int enqueueCopyImageToBuffer( + const Image& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& region, + ::size_t dst_offset, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyImageToBuffer( + src, + dst, + src_origin, + region, + dst_offset, + events, + event); +} + +inline cl_int enqueueCopyBufferToImage( + const Buffer& src, + const Image& dst, + ::size_t src_offset, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBufferToImage( + src, + dst, + src_offset, + dst_origin, + region, + events, + event); +} + + +inline cl_int flush(void) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.flush(); +} + +inline cl_int finish(void) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + + return queue.finish(); +} + +// Kernel Functor support +// New interface as of September 2011 +// Requires the C++11 std::tr1::function (note do not support TR1) +// Visual Studio 2010 and GCC 4.2 + +struct EnqueueArgs +{ + CommandQueue queue_; + const NDRange offset_; + const NDRange global_; + const NDRange local_; + VECTOR_CLASS events_; + + EnqueueArgs(NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange) + { + + } + + EnqueueArgs(NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local) + { + + } + + EnqueueArgs(NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local) + { + + } + + EnqueueArgs(Event e, NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange) + { + events_.push_back(e); + } + + EnqueueArgs(Event e, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(Event e, NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(const VECTOR_CLASS &events, NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange), + events_(events) + { + + } + + EnqueueArgs(const VECTOR_CLASS &events, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local), + events_(events) + { + + } + + EnqueueArgs(const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local), + events_(events) + { + + } + + EnqueueArgs(CommandQueue &queue, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange) + { + + } + + EnqueueArgs(CommandQueue &queue, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local) + { + + } + + EnqueueArgs(CommandQueue &queue, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local) + { + + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange) + { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange), + events_(events) + { + + } + + EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local), + events_(events) + { + + } + + EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local), + events_(events) + { + + } +}; + +namespace detail { + +class NullType {}; + +template +struct SetArg +{ + static void set (Kernel kernel, T0 arg) + { + kernel.setArg(index, arg); + } +}; + +template +struct SetArg +{ + static void set (Kernel, NullType) + { + } +}; + +template < + typename T0, typename T1, typename T2, typename T3, + typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10, typename T11, + typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, + typename T20, typename T21, typename T22, typename T23, + typename T24, typename T25, typename T26, typename T27, + typename T28, typename T29, typename T30, typename T31 +> +class KernelFunctorGlobal +{ +private: + Kernel kernel_; + +public: + KernelFunctorGlobal( + Kernel kernel) : + kernel_(kernel) + {} + + KernelFunctorGlobal( + const Program& program, + const STRING_CLASS name, + cl_int * err = NULL) : + kernel_(program, name.c_str(), err) + {} + + Event operator() ( + const EnqueueArgs& args, + T0 t0, + T1 t1 = NullType(), + T2 t2 = NullType(), + T3 t3 = NullType(), + T4 t4 = NullType(), + T5 t5 = NullType(), + T6 t6 = NullType(), + T7 t7 = NullType(), + T8 t8 = NullType(), + T9 t9 = NullType(), + T10 t10 = NullType(), + T11 t11 = NullType(), + T12 t12 = NullType(), + T13 t13 = NullType(), + T14 t14 = NullType(), + T15 t15 = NullType(), + T16 t16 = NullType(), + T17 t17 = NullType(), + T18 t18 = NullType(), + T19 t19 = NullType(), + T20 t20 = NullType(), + T21 t21 = NullType(), + T22 t22 = NullType(), + T23 t23 = NullType(), + T24 t24 = NullType(), + T25 t25 = NullType(), + T26 t26 = NullType(), + T27 t27 = NullType(), + T28 t28 = NullType(), + T29 t29 = NullType(), + T30 t30 = NullType(), + T31 t31 = NullType() + ) + { + Event event; + SetArg<0, T0>::set(kernel_, t0); + SetArg<1, T1>::set(kernel_, t1); + SetArg<2, T2>::set(kernel_, t2); + SetArg<3, T3>::set(kernel_, t3); + SetArg<4, T4>::set(kernel_, t4); + SetArg<5, T5>::set(kernel_, t5); + SetArg<6, T6>::set(kernel_, t6); + SetArg<7, T7>::set(kernel_, t7); + SetArg<8, T8>::set(kernel_, t8); + SetArg<9, T9>::set(kernel_, t9); + SetArg<10, T10>::set(kernel_, t10); + SetArg<11, T11>::set(kernel_, t11); + SetArg<12, T12>::set(kernel_, t12); + SetArg<13, T13>::set(kernel_, t13); + SetArg<14, T14>::set(kernel_, t14); + SetArg<15, T15>::set(kernel_, t15); + SetArg<16, T16>::set(kernel_, t16); + SetArg<17, T17>::set(kernel_, t17); + SetArg<18, T18>::set(kernel_, t18); + SetArg<19, T19>::set(kernel_, t19); + SetArg<20, T20>::set(kernel_, t20); + SetArg<21, T21>::set(kernel_, t21); + SetArg<22, T22>::set(kernel_, t22); + SetArg<23, T23>::set(kernel_, t23); + SetArg<24, T24>::set(kernel_, t24); + SetArg<25, T25>::set(kernel_, t25); + SetArg<26, T26>::set(kernel_, t26); + SetArg<27, T27>::set(kernel_, t27); + SetArg<28, T28>::set(kernel_, t28); + SetArg<29, T29>::set(kernel_, t29); + SetArg<30, T30>::set(kernel_, t30); + SetArg<31, T31>::set(kernel_, t31); + + args.queue_.enqueueNDRangeKernel( + kernel_, + args.offset_, + args.global_, + args.local_, + &args.events_, + &event); + + return event; + } + +}; + +//------------------------------------------------------------------------------------------------------ + + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31> +struct functionImplementation_ +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 32)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 31)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 30)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 29)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 28)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 27)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 26)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 25)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 24)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 23)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 22)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 21)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 20)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 19)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 18)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 17)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 16)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 15)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 14)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 13)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 12)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 11)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 10)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 9)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 8)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 7)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 6)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 5)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 4)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3); + } + + +}; + +template< + typename T0, + typename T1, + typename T2> +struct functionImplementation_ +< T0, + T1, + T2, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 3)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2); + } + + +}; + +template< + typename T0, + typename T1> +struct functionImplementation_ +< T0, + T1, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 2)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1) + { + return functor_( + enqueueArgs, + arg0, + arg1); + } + + +}; + +template< + typename T0> +struct functionImplementation_ +< T0, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 1)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0) + { + return functor_( + enqueueArgs, + arg0); + } + + +}; + + + + + +} // namespace detail + +//---------------------------------------------------------------------------------------------- + +template < + typename T0, typename T1 = detail::NullType, typename T2 = detail::NullType, + typename T3 = detail::NullType, typename T4 = detail::NullType, + typename T5 = detail::NullType, typename T6 = detail::NullType, + typename T7 = detail::NullType, typename T8 = detail::NullType, + typename T9 = detail::NullType, typename T10 = detail::NullType, + typename T11 = detail::NullType, typename T12 = detail::NullType, + typename T13 = detail::NullType, typename T14 = detail::NullType, + typename T15 = detail::NullType, typename T16 = detail::NullType, + typename T17 = detail::NullType, typename T18 = detail::NullType, + typename T19 = detail::NullType, typename T20 = detail::NullType, + typename T21 = detail::NullType, typename T22 = detail::NullType, + typename T23 = detail::NullType, typename T24 = detail::NullType, + typename T25 = detail::NullType, typename T26 = detail::NullType, + typename T27 = detail::NullType, typename T28 = detail::NullType, + typename T29 = detail::NullType, typename T30 = detail::NullType, + typename T31 = detail::NullType +> +struct make_kernel : + public detail::functionImplementation_< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31 + > +{ +public: + typedef detail::KernelFunctorGlobal< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31 + > FunctorType; + + make_kernel( + const Program& program, + const STRING_CLASS name, + cl_int * err = NULL) : + detail::functionImplementation_< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31 + >( + FunctorType(program, name, err)) + {} + + make_kernel( + const Kernel kernel) : + detail::functionImplementation_< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31 + >( + FunctorType(kernel)) + {} +}; + + +//---------------------------------------------------------------------------------------------------------------------- + +#undef __ERR_STR +#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) +#undef __GET_DEVICE_INFO_ERR +#undef __GET_PLATFORM_INFO_ERR +#undef __GET_DEVICE_IDS_ERR +#undef __GET_CONTEXT_INFO_ERR +#undef __GET_EVENT_INFO_ERR +#undef __GET_EVENT_PROFILE_INFO_ERR +#undef __GET_MEM_OBJECT_INFO_ERR +#undef __GET_IMAGE_INFO_ERR +#undef __GET_SAMPLER_INFO_ERR +#undef __GET_KERNEL_INFO_ERR +#undef __GET_KERNEL_ARG_INFO_ERR +#undef __GET_KERNEL_WORK_GROUP_INFO_ERR +#undef __GET_PROGRAM_INFO_ERR +#undef __GET_PROGRAM_BUILD_INFO_ERR +#undef __GET_COMMAND_QUEUE_INFO_ERR + +#undef __CREATE_CONTEXT_ERR +#undef __CREATE_CONTEXT_FROM_TYPE_ERR +#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR + +#undef __CREATE_BUFFER_ERR +#undef __CREATE_SUBBUFFER_ERR +#undef __CREATE_IMAGE2D_ERR +#undef __CREATE_IMAGE3D_ERR +#undef __CREATE_SAMPLER_ERR +#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR + +#undef __CREATE_USER_EVENT_ERR +#undef __SET_USER_EVENT_STATUS_ERR +#undef __SET_EVENT_CALLBACK_ERR +#undef __SET_PRINTF_CALLBACK_ERR + +#undef __WAIT_FOR_EVENTS_ERR + +#undef __CREATE_KERNEL_ERR +#undef __SET_KERNEL_ARGS_ERR +#undef __CREATE_PROGRAM_WITH_SOURCE_ERR +#undef __CREATE_PROGRAM_WITH_BINARY_ERR +#undef __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR +#undef __BUILD_PROGRAM_ERR +#undef __CREATE_KERNELS_IN_PROGRAM_ERR + +#undef __CREATE_COMMAND_QUEUE_ERR +#undef __SET_COMMAND_QUEUE_PROPERTY_ERR +#undef __ENQUEUE_READ_BUFFER_ERR +#undef __ENQUEUE_WRITE_BUFFER_ERR +#undef __ENQUEUE_READ_BUFFER_RECT_ERR +#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR +#undef __ENQEUE_COPY_BUFFER_ERR +#undef __ENQEUE_COPY_BUFFER_RECT_ERR +#undef __ENQUEUE_READ_IMAGE_ERR +#undef __ENQUEUE_WRITE_IMAGE_ERR +#undef __ENQUEUE_COPY_IMAGE_ERR +#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR +#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR +#undef __ENQUEUE_MAP_BUFFER_ERR +#undef __ENQUEUE_MAP_IMAGE_ERR +#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR +#undef __ENQUEUE_NDRANGE_KERNEL_ERR +#undef __ENQUEUE_TASK_ERR +#undef __ENQUEUE_NATIVE_KERNEL + +#undef __CL_EXPLICIT_CONSTRUCTORS + +#undef __UNLOAD_COMPILER_ERR +#endif //__CL_USER_OVERRIDE_ERROR_STRINGS + +#undef __CL_FUNCTION_TYPE + +// Extensions +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_VERSION_1_1) +#undef __INIT_CL_EXT_FCN_PTR +#endif // #if defined(CL_VERSION_1_1) +#undef __CREATE_SUB_DEVICES + +#if defined(USE_CL_DEVICE_FISSION) +#undef __PARAM_NAME_DEVICE_FISSION +#endif // USE_CL_DEVICE_FISSION + +#undef __DEFAULT_NOT_INITIALIZED +#undef __DEFAULT_BEING_INITIALIZED +#undef __DEFAULT_INITIALIZED + +} // namespace cl + +#ifdef _WIN32 +#pragma pop_macro("max") +#endif // _WIN32 + +#endif // CL_HPP_ diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/ethash_cl_miner.cpp b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/ethash_cl_miner.cpp new file mode 100644 index 000000000..c0dbea21d --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/ethash_cl_miner.cpp @@ -0,0 +1,289 @@ +/* + This file is part of c-ethash. + + c-ethash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + c-ethash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file ethash_cl_miner.cpp +* @author Tim Hughes +* @date 2015 +*/ + + +#define _CRT_SECURE_NO_WARNINGS + +#include +#include +#include "ethash_cl_miner.h" +#include "ethash_cl_miner_kernel.h" +#include + +#undef min +#undef max + +#define HASH_BYTES 32 + +static void add_definition(std::string& source, char const* id, unsigned value) +{ + char buf[256]; + sprintf(buf, "#define %s %uu\n", id, value); + source.insert(source.begin(), buf, buf + strlen(buf)); +} + +ethash_cl_miner::ethash_cl_miner() +{ +} + +bool ethash_cl_miner::init(ethash_params const& params, const uint8_t seed[32], unsigned workgroup_size) +{ + // store params + m_params = params; + + // get all platforms + std::vector platforms; + cl::Platform::get(&platforms); + if (platforms.empty()) + { + debugf("No OpenCL platforms found.\n"); + return false; + } + + // use default platform + debugf("Using platform: %s\n", platforms[0].getInfo().c_str()); + + // get GPU device of the default platform + std::vector devices; + platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices); + if (devices.empty()) + { + debugf("No OpenCL devices found.\n"); + return false; + } + + // use default device + cl::Device& device = devices[0]; + debugf("Using device: %s\n", device.getInfo().c_str()); + + // create context + m_context = cl::Context({device}); + m_queue = cl::CommandQueue(m_context, device); + + // use requested workgroup size, but we require multiple of 8 + m_workgroup_size = ((workgroup_size + 7) / 8) * 8; + + // patch source code + std::string code(ETHASH_CL_MINER_KERNEL, ETHASH_CL_MINER_KERNEL + ETHASH_CL_MINER_KERNEL_SIZE); + add_definition(code, "GROUP_SIZE", m_workgroup_size); + add_definition(code, "DAG_SIZE", (unsigned)(params.full_size / MIX_BYTES)); + add_definition(code, "ACCESSES", ACCESSES); + add_definition(code, "MAX_OUTPUTS", c_max_search_results); + //debugf("%s", code.c_str()); + + // create miner OpenCL program + cl::Program::Sources sources; + sources.push_back({code.c_str(), code.size()}); + + cl::Program program(m_context, sources); + try + { + program.build({device}); + } + catch (cl::Error err) + { + debugf("%s\n", program.getBuildInfo(device).c_str()); + return false; + } + m_hash_kernel = cl::Kernel(program, "ethash_hash"); + m_search_kernel = cl::Kernel(program, "ethash_search"); + + // create buffer for dag + m_dag = cl::Buffer(m_context, CL_MEM_READ_ONLY, params.full_size); + + // create buffer for header + m_header = cl::Buffer(m_context, CL_MEM_READ_ONLY, 32); + + // compute dag on CPU + { + void* cache_mem = malloc(params.cache_size + 63); + ethash_cache cache; + cache.mem = (void*)(((uintptr_t)cache_mem + 63) & ~63); + ethash_mkcache(&cache, ¶ms, seed); + + // if this throws then it's because we probably need to subdivide the dag uploads for compatibility + void* dag_ptr = m_queue.enqueueMapBuffer(m_dag, true, CL_MAP_WRITE_INVALIDATE_REGION, 0, params.full_size); + ethash_compute_full_data(dag_ptr, ¶ms, &cache); + m_queue.enqueueUnmapMemObject(m_dag, dag_ptr); + + free(cache_mem); + } + + // create mining buffers + for (unsigned i = 0; i != c_num_buffers; ++i) + { + m_hash_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY | CL_MEM_HOST_READ_ONLY, 32*c_hash_batch_size); + m_search_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, (c_max_search_results + 1) * sizeof(uint32_t)); + } + return true; +} + +void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count) +{ + struct pending_batch + { + unsigned base; + unsigned count; + unsigned buf; + }; + std::queue pending; + + // update header constant buffer + m_queue.enqueueWriteBuffer(m_header, true, 0, 32, header); + + /* + __kernel void ethash_combined_hash( + __global hash32_t* g_hashes, + __constant hash32_t const* g_header, + __global hash128_t const* g_dag, + ulong start_nonce, + uint isolate + ) + */ + m_hash_kernel.setArg(1, m_header); + m_hash_kernel.setArg(2, m_dag); + m_hash_kernel.setArg(3, nonce); + m_hash_kernel.setArg(4, ~0u); // have to pass this to stop the compile unrolling the loop + + unsigned buf = 0; + for (unsigned i = 0; i < count || !pending.empty(); ) + { + // how many this batch + if (i < count) + { + unsigned const this_count = std::min(count - i, c_hash_batch_size); + unsigned const batch_count = std::max(this_count, m_workgroup_size); + + // supply output hash buffer to kernel + m_hash_kernel.setArg(0, m_hash_buf[buf]); + + // execute it! + clock_t start_time = clock(); + m_queue.enqueueNDRangeKernel( + m_hash_kernel, + cl::NullRange, + cl::NDRange(batch_count), + cl::NDRange(m_workgroup_size) + ); + m_queue.flush(); + + pending.push({i, this_count, buf}); + i += this_count; + buf = (buf + 1) % c_num_buffers; + } + + // read results + if (i == count || pending.size() == c_num_buffers) + { + pending_batch const& batch = pending.front(); + + // could use pinned host pointer instead, but this path isn't that important. + uint8_t* hashes = (uint8_t*)m_queue.enqueueMapBuffer(m_hash_buf[batch.buf], true, CL_MAP_READ, 0, batch.count * HASH_BYTES); + memcpy(ret + batch.base*HASH_BYTES, hashes, batch.count*HASH_BYTES); + m_queue.enqueueUnmapMemObject(m_hash_buf[batch.buf], hashes); + + pending.pop(); + } + } +} + + +void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook) +{ + struct pending_batch + { + uint64_t start_nonce; + unsigned buf; + }; + std::queue pending; + + static uint32_t const c_zero = 0; + + // update header constant buffer + m_queue.enqueueWriteBuffer(m_header, false, 0, 32, header); + for (unsigned i = 0; i != c_num_buffers; ++i) + { + m_queue.enqueueWriteBuffer(m_search_buf[i], false, 0, 4, &c_zero); + } + cl::Event pre_return_event; + m_queue.enqueueBarrierWithWaitList(NULL, &pre_return_event); + + /* + __kernel void ethash_combined_search( + __global hash32_t* g_hashes, // 0 + __constant hash32_t const* g_header, // 1 + __global hash128_t const* g_dag, // 2 + ulong start_nonce, // 3 + ulong target, // 4 + uint isolate // 5 + ) + */ + m_search_kernel.setArg(1, m_header); + m_search_kernel.setArg(2, m_dag); + + // pass these to stop the compiler unrolling the loops + m_search_kernel.setArg(4, target); + m_search_kernel.setArg(5, ~0u); + + + unsigned buf = 0; + for (uint64_t start_nonce = 0; ; start_nonce += c_search_batch_size) + { + // supply output buffer to kernel + m_search_kernel.setArg(0, m_search_buf[buf]); + m_search_kernel.setArg(3, start_nonce); + + // execute it! + m_queue.enqueueNDRangeKernel(m_search_kernel, cl::NullRange, c_search_batch_size, m_workgroup_size); + + pending.push({start_nonce, buf}); + buf = (buf + 1) % c_num_buffers; + + // read results + if (pending.size() == c_num_buffers) + { + pending_batch const& batch = pending.front(); + + // could use pinned host pointer instead + uint32_t* results = (uint32_t*)m_queue.enqueueMapBuffer(m_search_buf[batch.buf], true, CL_MAP_READ, 0, (1+c_max_search_results) * sizeof(uint32_t)); + unsigned num_found = std::min(results[0], c_max_search_results); + + uint64_t nonces[c_max_search_results]; + for (unsigned i = 0; i != num_found; ++i) + { + nonces[i] = batch.start_nonce + results[i+1]; + } + + m_queue.enqueueUnmapMemObject(m_search_buf[batch.buf], results); + + bool exit = num_found && hook.found(nonces, num_found); + exit |= hook.searched(batch.start_nonce, c_search_batch_size); // always report searched before exit + if (exit) + break; + + pending.pop(); + } + } + + // not safe to return until this is ready + pre_return_event.wait(); +} + diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/ethash_cl_miner.h b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/ethash_cl_miner.h new file mode 100644 index 000000000..f37100d91 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/ethash_cl_miner.h @@ -0,0 +1,43 @@ +#pragma once + +#define __CL_ENABLE_EXCEPTIONS +#define CL_USE_DEPRECATED_OPENCL_2_0_APIS +#include "cl.hpp" +#include +#include + +class ethash_cl_miner +{ +public: + struct search_hook + { + // reports progress, return true to abort + virtual bool found(uint64_t const* nonces, uint32_t count) = 0; + virtual bool searched(uint64_t start_nonce, uint32_t count) = 0; + }; + +public: + ethash_cl_miner(); + + bool init(ethash_params const& params, const uint8_t seed[32], unsigned workgroup_size = 64); + + void hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count); + void search(uint8_t const* header, uint64_t target, search_hook& hook); + +private: + static unsigned const c_max_search_results = 63; + static unsigned const c_num_buffers = 2; + static unsigned const c_hash_batch_size = 1024; + static unsigned const c_search_batch_size = 1024*256; + + ethash_params m_params; + cl::Context m_context; + cl::CommandQueue m_queue; + cl::Kernel m_hash_kernel; + cl::Kernel m_search_kernel; + cl::Buffer m_dag; + cl::Buffer m_header; + cl::Buffer m_hash_buf[c_num_buffers]; + cl::Buffer m_search_buf[c_num_buffers]; + unsigned m_workgroup_size; +}; \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/ethash_cl_miner_kernel.cl b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/ethash_cl_miner_kernel.cl new file mode 100644 index 000000000..ee21a331e --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash-cl/ethash_cl_miner_kernel.cl @@ -0,0 +1,461 @@ +// author Tim Hughes +// Tested on Radeon HD 7850 +// Hashrate: 15940347 hashes/s +// Bandwidth: 124533 MB/s +// search kernel should fit in <= 84 VGPRS (3 wavefronts) + +#define THREADS_PER_HASH (128 / 16) +#define HASHES_PER_LOOP (GROUP_SIZE / THREADS_PER_HASH) + +#define FNV_PRIME 0x01000193 + +__constant uint2 const Keccak_f1600_RC[24] = { + (uint2)(0x00000001, 0x00000000), + (uint2)(0x00008082, 0x00000000), + (uint2)(0x0000808a, 0x80000000), + (uint2)(0x80008000, 0x80000000), + (uint2)(0x0000808b, 0x00000000), + (uint2)(0x80000001, 0x00000000), + (uint2)(0x80008081, 0x80000000), + (uint2)(0x00008009, 0x80000000), + (uint2)(0x0000008a, 0x00000000), + (uint2)(0x00000088, 0x00000000), + (uint2)(0x80008009, 0x00000000), + (uint2)(0x8000000a, 0x00000000), + (uint2)(0x8000808b, 0x00000000), + (uint2)(0x0000008b, 0x80000000), + (uint2)(0x00008089, 0x80000000), + (uint2)(0x00008003, 0x80000000), + (uint2)(0x00008002, 0x80000000), + (uint2)(0x00000080, 0x80000000), + (uint2)(0x0000800a, 0x00000000), + (uint2)(0x8000000a, 0x80000000), + (uint2)(0x80008081, 0x80000000), + (uint2)(0x00008080, 0x80000000), + (uint2)(0x80000001, 0x00000000), + (uint2)(0x80008008, 0x80000000), +}; + +void keccak_f1600_round(uint2* a, uint r, uint out_size) +{ + #if !__ENDIAN_LITTLE__ + for (uint i = 0; i != 25; ++i) + a[i] = a[i].yx; + #endif + + uint2 b[25]; + uint2 t; + + // Theta + b[0] = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20]; + b[1] = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21]; + b[2] = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22]; + b[3] = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23]; + b[4] = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24]; + t = b[4] ^ (uint2)(b[1].x << 1 | b[1].y >> 31, b[1].y << 1 | b[1].x >> 31); + a[0] ^= t; + a[5] ^= t; + a[10] ^= t; + a[15] ^= t; + a[20] ^= t; + t = b[0] ^ (uint2)(b[2].x << 1 | b[2].y >> 31, b[2].y << 1 | b[2].x >> 31); + a[1] ^= t; + a[6] ^= t; + a[11] ^= t; + a[16] ^= t; + a[21] ^= t; + t = b[1] ^ (uint2)(b[3].x << 1 | b[3].y >> 31, b[3].y << 1 | b[3].x >> 31); + a[2] ^= t; + a[7] ^= t; + a[12] ^= t; + a[17] ^= t; + a[22] ^= t; + t = b[2] ^ (uint2)(b[4].x << 1 | b[4].y >> 31, b[4].y << 1 | b[4].x >> 31); + a[3] ^= t; + a[8] ^= t; + a[13] ^= t; + a[18] ^= t; + a[23] ^= t; + t = b[3] ^ (uint2)(b[0].x << 1 | b[0].y >> 31, b[0].y << 1 | b[0].x >> 31); + a[4] ^= t; + a[9] ^= t; + a[14] ^= t; + a[19] ^= t; + a[24] ^= t; + + // Rho Pi + b[0] = a[0]; + b[10] = (uint2)(a[1].x << 1 | a[1].y >> 31, a[1].y << 1 | a[1].x >> 31); + b[7] = (uint2)(a[10].x << 3 | a[10].y >> 29, a[10].y << 3 | a[10].x >> 29); + b[11] = (uint2)(a[7].x << 6 | a[7].y >> 26, a[7].y << 6 | a[7].x >> 26); + b[17] = (uint2)(a[11].x << 10 | a[11].y >> 22, a[11].y << 10 | a[11].x >> 22); + b[18] = (uint2)(a[17].x << 15 | a[17].y >> 17, a[17].y << 15 | a[17].x >> 17); + b[3] = (uint2)(a[18].x << 21 | a[18].y >> 11, a[18].y << 21 | a[18].x >> 11); + b[5] = (uint2)(a[3].x << 28 | a[3].y >> 4, a[3].y << 28 | a[3].x >> 4); + b[16] = (uint2)(a[5].y << 4 | a[5].x >> 28, a[5].x << 4 | a[5].y >> 28); + b[8] = (uint2)(a[16].y << 13 | a[16].x >> 19, a[16].x << 13 | a[16].y >> 19); + b[21] = (uint2)(a[8].y << 23 | a[8].x >> 9, a[8].x << 23 | a[8].y >> 9); + b[24] = (uint2)(a[21].x << 2 | a[21].y >> 30, a[21].y << 2 | a[21].x >> 30); + b[4] = (uint2)(a[24].x << 14 | a[24].y >> 18, a[24].y << 14 | a[24].x >> 18); + b[15] = (uint2)(a[4].x << 27 | a[4].y >> 5, a[4].y << 27 | a[4].x >> 5); + b[23] = (uint2)(a[15].y << 9 | a[15].x >> 23, a[15].x << 9 | a[15].y >> 23); + b[19] = (uint2)(a[23].y << 24 | a[23].x >> 8, a[23].x << 24 | a[23].y >> 8); + b[13] = (uint2)(a[19].x << 8 | a[19].y >> 24, a[19].y << 8 | a[19].x >> 24); + b[12] = (uint2)(a[13].x << 25 | a[13].y >> 7, a[13].y << 25 | a[13].x >> 7); + b[2] = (uint2)(a[12].y << 11 | a[12].x >> 21, a[12].x << 11 | a[12].y >> 21); + b[20] = (uint2)(a[2].y << 30 | a[2].x >> 2, a[2].x << 30 | a[2].y >> 2); + b[14] = (uint2)(a[20].x << 18 | a[20].y >> 14, a[20].y << 18 | a[20].x >> 14); + b[22] = (uint2)(a[14].y << 7 | a[14].x >> 25, a[14].x << 7 | a[14].y >> 25); + b[9] = (uint2)(a[22].y << 29 | a[22].x >> 3, a[22].x << 29 | a[22].y >> 3); + b[6] = (uint2)(a[9].x << 20 | a[9].y >> 12, a[9].y << 20 | a[9].x >> 12); + b[1] = (uint2)(a[6].y << 12 | a[6].x >> 20, a[6].x << 12 | a[6].y >> 20); + + // Chi + a[0] = bitselect(b[0] ^ b[2], b[0], b[1]); + a[1] = bitselect(b[1] ^ b[3], b[1], b[2]); + a[2] = bitselect(b[2] ^ b[4], b[2], b[3]); + a[3] = bitselect(b[3] ^ b[0], b[3], b[4]); + if (out_size >= 4) + { + a[4] = bitselect(b[4] ^ b[1], b[4], b[0]); + a[5] = bitselect(b[5] ^ b[7], b[5], b[6]); + a[6] = bitselect(b[6] ^ b[8], b[6], b[7]); + a[7] = bitselect(b[7] ^ b[9], b[7], b[8]); + a[8] = bitselect(b[8] ^ b[5], b[8], b[9]); + if (out_size >= 8) + { + a[9] = bitselect(b[9] ^ b[6], b[9], b[5]); + a[10] = bitselect(b[10] ^ b[12], b[10], b[11]); + a[11] = bitselect(b[11] ^ b[13], b[11], b[12]); + a[12] = bitselect(b[12] ^ b[14], b[12], b[13]); + a[13] = bitselect(b[13] ^ b[10], b[13], b[14]); + a[14] = bitselect(b[14] ^ b[11], b[14], b[10]); + a[15] = bitselect(b[15] ^ b[17], b[15], b[16]); + a[16] = bitselect(b[16] ^ b[18], b[16], b[17]); + a[17] = bitselect(b[17] ^ b[19], b[17], b[18]); + a[18] = bitselect(b[18] ^ b[15], b[18], b[19]); + a[19] = bitselect(b[19] ^ b[16], b[19], b[15]); + a[20] = bitselect(b[20] ^ b[22], b[20], b[21]); + a[21] = bitselect(b[21] ^ b[23], b[21], b[22]); + a[22] = bitselect(b[22] ^ b[24], b[22], b[23]); + a[23] = bitselect(b[23] ^ b[20], b[23], b[24]); + a[24] = bitselect(b[24] ^ b[21], b[24], b[20]); + } + } + + // Iota + a[0] ^= Keccak_f1600_RC[r]; + + #if !__ENDIAN_LITTLE__ + for (uint i = 0; i != 25; ++i) + a[i] = a[i].yx; + #endif +} + +void keccak_f1600_no_absorb(ulong* a, uint in_size, uint out_size, uint isolate) +{ + for (uint i = in_size; i != 25; ++i) + { + a[i] = 0; + } +#if __ENDIAN_LITTLE__ + a[in_size] ^= 0x0000000000000001; + a[24-out_size*2] ^= 0x8000000000000000; +#else + a[in_size] ^= 0x0100000000000000; + a[24-out_size*2] ^= 0x0000000000000080; +#endif + + // Originally I unrolled the first and last rounds to interface + // better with surrounding code, however I haven't done this + // without causing the AMD compiler to blow up the VGPR usage. + uint r = 0; + do + { + // This dynamic branch stops the AMD compiler unrolling the loop + // and additionally saves about 33% of the VGPRs, enough to gain another + // wavefront. Ideally we'd get 4 in flight, but 3 is the best I can + // massage out of the compiler. It doesn't really seem to matter how + // much we try and help the compiler save VGPRs because it seems to throw + // that information away, hence the implementation of keccak here + // doesn't bother. + if (isolate) + { + keccak_f1600_round((uint2*)a, r++, 25); + } + } + while (r < 23); + + // final round optimised for digest size + keccak_f1600_round((uint2*)a, r++, out_size); +} + +#define copy(dst, src, count) for (uint i = 0; i != count; ++i) { (dst)[i] = (src)[i]; } + +#define countof(x) (sizeof(x) / sizeof(x[0])) + +uint fnv(uint x, uint y) +{ + return x * FNV_PRIME ^ y; +} + +uint4 fnv4(uint4 x, uint4 y) +{ + return x * FNV_PRIME ^ y; +} + +uint fnv_reduce(uint4 v) +{ + return fnv(fnv(fnv(v.x, v.y), v.z), v.w); +} + +typedef union +{ + ulong ulongs[32 / sizeof(ulong)]; + uint uints[32 / sizeof(uint)]; +} hash32_t; + +typedef union +{ + ulong ulongs[64 / sizeof(ulong)]; + uint4 uint4s[64 / sizeof(uint4)]; +} hash64_t; + +typedef union +{ + uint uints[128 / sizeof(uint)]; + uint4 uint4s[128 / sizeof(uint4)]; +} hash128_t; + +hash64_t init_hash(__constant hash32_t const* header, ulong nonce, uint isolate) +{ + hash64_t init; + uint const init_size = countof(init.ulongs); + uint const hash_size = countof(header->ulongs); + + // sha3_512(header .. nonce) + ulong state[25]; + copy(state, header->ulongs, hash_size); + state[hash_size] = nonce; + keccak_f1600_no_absorb(state, hash_size + 1, init_size, isolate); + + copy(init.ulongs, state, init_size); + return init; +} + +uint inner_loop(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, uint isolate) +{ + uint4 mix = init; + + // share init0 + if (thread_id == 0) + *share = mix.x; + barrier(CLK_LOCAL_MEM_FENCE); + uint init0 = *share; + + uint a = 0; + do + { + bool update_share = thread_id == (a/4) % THREADS_PER_HASH; + + #pragma unroll + for (uint i = 0; i != 4; ++i) + { + if (update_share) + { + uint m[4] = { mix.x, mix.y, mix.z, mix.w }; + *share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE; + } + barrier(CLK_LOCAL_MEM_FENCE); + + mix = fnv4(mix, g_dag[*share].uint4s[thread_id]); + } + } + while ((a += 4) != (ACCESSES & isolate)); + + return fnv_reduce(mix); +} + +hash32_t final_hash(hash64_t const* init, hash32_t const* mix, uint isolate) +{ + ulong state[25]; + + hash32_t hash; + uint const hash_size = countof(hash.ulongs); + uint const init_size = countof(init->ulongs); + uint const mix_size = countof(mix->ulongs); + + // keccak_256(keccak_512(header..nonce) .. mix); + copy(state, init->ulongs, init_size); + copy(state + init_size, mix->ulongs, mix_size); + keccak_f1600_no_absorb(state, init_size+mix_size, hash_size, isolate); + + // copy out + copy(hash.ulongs, state, hash_size); + return hash; +} + +hash32_t compute_hash_simple( + __constant hash32_t const* g_header, + __global hash128_t const* g_dag, + ulong nonce, + uint isolate + ) +{ + hash64_t init = init_hash(g_header, nonce, isolate); + + hash128_t mix; + for (uint i = 0; i != countof(mix.uint4s); ++i) + { + mix.uint4s[i] = init.uint4s[i % countof(init.uint4s)]; + } + + uint mix_val = mix.uints[0]; + uint init0 = mix.uints[0]; + uint a = 0; + do + { + uint pi = fnv(init0 ^ a, mix_val) % DAG_SIZE; + uint n = (a+1) % countof(mix.uints); + + #pragma unroll + for (uint i = 0; i != countof(mix.uints); ++i) + { + mix.uints[i] = fnv(mix.uints[i], g_dag[pi].uints[i]); + mix_val = i == n ? mix.uints[i] : mix_val; + } + } + while (++a != (ACCESSES & isolate)); + + // reduce to output + hash32_t fnv_mix; + for (uint i = 0; i != countof(fnv_mix.uints); ++i) + { + fnv_mix.uints[i] = fnv_reduce(mix.uint4s[i]); + } + + return final_hash(&init, &fnv_mix, isolate); +} + +typedef union +{ + struct + { + hash64_t init; + uint pad; // avoid lds bank conflicts + }; + hash32_t mix; +} compute_hash_share; + +hash32_t compute_hash( + __local compute_hash_share* share, + __constant hash32_t const* g_header, + __global hash128_t const* g_dag, + ulong nonce, + uint isolate + ) +{ + uint const gid = get_global_id(0); + + // Compute one init hash per work item. + hash64_t init = init_hash(g_header, nonce, isolate); + + // Threads work together in this phase in groups of 8. + uint const thread_id = gid % THREADS_PER_HASH; + uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH; + + hash32_t mix; + uint i = 0; + do + { + // share init with other threads + if (i == thread_id) + share[hash_id].init = init; + barrier(CLK_LOCAL_MEM_FENCE); + + uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))]; + barrier(CLK_LOCAL_MEM_FENCE); + + uint thread_mix = inner_loop(thread_init, thread_id, share[hash_id].mix.uints, g_dag, isolate); + + share[hash_id].mix.uints[thread_id] = thread_mix; + barrier(CLK_LOCAL_MEM_FENCE); + + if (i == thread_id) + mix = share[hash_id].mix; + barrier(CLK_LOCAL_MEM_FENCE); + } + while (++i != (THREADS_PER_HASH & isolate)); + + return final_hash(&init, &mix, isolate); +} + +__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) +__kernel void ethash_hash_simple( + __global hash32_t* g_hashes, + __constant hash32_t const* g_header, + __global hash128_t const* g_dag, + ulong start_nonce, + uint isolate + ) +{ + uint const gid = get_global_id(0); + g_hashes[gid] = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate); +} + +__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) +__kernel void ethash_search_simple( + __global volatile uint* restrict g_output, + __constant hash32_t const* g_header, + __global hash128_t const* g_dag, + ulong start_nonce, + ulong target, + uint isolate + ) +{ + uint const gid = get_global_id(0); + hash32_t hash = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate); + + if (hash.ulongs[countof(hash.ulongs)-1] < target) + { + uint slot = min(MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1); + g_output[slot] = gid; + } +} + +__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) +__kernel void ethash_hash( + __global hash32_t* g_hashes, + __constant hash32_t const* g_header, + __global hash128_t const* g_dag, + ulong start_nonce, + uint isolate + ) +{ + __local compute_hash_share share[HASHES_PER_LOOP]; + + uint const gid = get_global_id(0); + g_hashes[gid] = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate); +} + +__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) +__kernel void ethash_search( + __global volatile uint* restrict g_output, + __constant hash32_t const* g_header, + __global hash128_t const* g_dag, + ulong start_nonce, + ulong target, + uint isolate + ) +{ + __local compute_hash_share share[HASHES_PER_LOOP]; + + uint const gid = get_global_id(0); + hash32_t hash = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate); + + if (hash.ulongs[countof(hash.ulongs)-1] < target) + { + uint slot = min(MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1); + g_output[slot] = gid; + } +} diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/CMakeLists.txt new file mode 100644 index 000000000..7bc147af7 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/CMakeLists.txt @@ -0,0 +1,39 @@ +set(LIBRARY ethash) + +if (CPPETHEREUM) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") +#else () +endif () + +set(CMAKE_BUILD_TYPE Release) + +if (NOT MSVC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu99") +endif() + +set(FILES util.c + util.h + internal.c + ethash.h + endian.h + compiler.h + fnv.h + data_sizes.h) + +if (NOT CRYPTOPP_FOUND) + find_package(CryptoPP 5.6.2) +endif() + +if (CRYPTOPP_FOUND) + add_definitions(-DWITH_CRYPTOPP) + include_directories( ${CRYPTOPP_INCLUDE_DIRS} ) + list(APPEND FILES sha3_cryptopp.cpp sha3_cryptopp.h) +else() + list(APPEND FILES sha3.c sha3.h) +endif() + +add_library(${LIBRARY} ${FILES}) + +if (CRYPTOPP_FOUND) + TARGET_LINK_LIBRARIES(${LIBRARY} ${CRYPTOPP_LIBRARIES}) +endif() diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/compiler.h b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/compiler.h new file mode 100644 index 000000000..9695871cd --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/compiler.h @@ -0,0 +1,33 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file compiler.h + * @date 2014 + */ +#pragma once + +// Visual Studio doesn't support the inline keyword in C mode +#if defined(_MSC_VER) && !defined(__cplusplus) +#define inline __inline +#endif + +// pretend restrict is a standard keyword +#if defined(_MSC_VER) +#define restrict __restrict +#else +#define restrict __restrict__ +#endif + diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/data_sizes.h b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/data_sizes.h new file mode 100644 index 000000000..a10d4c42c --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/data_sizes.h @@ -0,0 +1,779 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software FoundationUUU,either version 3 of the LicenseUUU,or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be usefulU, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If notUUU,see . +*/ + +/** @file data_sizes.h +* @author Matthew Wampler-Doty +* @date 2015 +*/ + +// TODO: Update this after ~3.5 years + +#pragma once + +#include +#include "compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +// 2048 Epochs worth of tabulated DAG sizes + +// Generated with the following Mathematica Code: + +// GetDataSizes[n_] := Module[{ +// DataSetSizeBytesInit = 2^30, +// MixBytes = 128, +// DataSetGrowth = 2^23, +// j = 0}, +// Reap[ +// While[j < n, +// Module[{i = +// Floor[(DataSetSizeBytesInit + DataSetGrowth * j) / MixBytes]}, +// While[! PrimeQ[i], i--]; +// Sow[i*MixBytes]; j++]]]][[2]][[1]] + +static const size_t dag_sizes[] = { + 1073739904U, 1082130304U, 1090514816U, 1098906752U, 1107293056U, + 1115684224U, 1124070016U, 1132461952U, 1140849536U, 1149232768U, + 1157627776U, 1166013824U, 1174404736U, 1182786944U, 1191180416U, + 1199568512U, 1207958912U, 1216345216U, 1224732032U, 1233124736U, + 1241513344U, 1249902464U, 1258290304U, 1266673792U, 1275067264U, + 1283453312U, 1291844992U, 1300234112U, 1308619904U, 1317010048U, + 1325397376U, 1333787776U, 1342176128U, 1350561664U, 1358954368U, + 1367339392U, 1375731584U, 1384118144U, 1392507008U, 1400897408U, + 1409284736U, 1417673344U, 1426062464U, 1434451072U, 1442839168U, + 1451229056U, 1459615616U, 1468006016U, 1476394112U, 1484782976U, + 1493171584U, 1501559168U, 1509948032U, 1518337664U, 1526726528U, + 1535114624U, 1543503488U, 1551892096U, 1560278656U, 1568669056U, + 1577056384U, 1585446272U, 1593831296U, 1602219392U, 1610610304U, + 1619000192U, 1627386752U, 1635773824U, 1644164224U, 1652555648U, + 1660943488U, 1669332608U, 1677721216U, 1686109312U, 1694497664U, + 1702886272U, 1711274624U, 1719661184U, 1728047744U, 1736434816U, + 1744829056U, 1753218944U, 1761606272U, 1769995904U, 1778382464U, + 1786772864U, 1795157888U, 1803550592U, 1811937664U, 1820327552U, + 1828711552U, 1837102976U, 1845488768U, 1853879936U, 1862269312U, + 1870656896U, 1879048064U, 1887431552U, 1895825024U, 1904212096U, + 1912601216U, 1920988544U, 1929379456U, 1937765504U, 1946156672U, + 1954543232U, 1962932096U, 1971321728U, 1979707264U, 1988093056U, + 1996487552U, 2004874624U, 2013262208U, 2021653888U, 2030039936U, + 2038430848U, 2046819968U, 2055208576U, 2063596672U, 2071981952U, + 2080373632U, 2088762752U, 2097149056U, 2105539712U, 2113928576U, + 2122315136U, 2130700672U, 2139092608U, 2147483264U, 2155872128U, + 2164257664U, 2172642176U, 2181035392U, 2189426048U, 2197814912U, + 2206203008U, 2214587264U, 2222979712U, 2231367808U, 2239758208U, + 2248145024U, 2256527744U, 2264922752U, 2273312128U, 2281701248U, + 2290086272U, 2298476672U, 2306867072U, 2315251072U, 2323639168U, + 2332032128U, 2340420224U, 2348808064U, 2357196416U, 2365580416U, + 2373966976U, 2382363008U, 2390748544U, 2399139968U, 2407530368U, + 2415918976U, 2424307328U, 2432695424U, 2441084288U, 2449472384U, + 2457861248U, 2466247808U, 2474637184U, 2483026816U, 2491414144U, + 2499803776U, 2508191872U, 2516582272U, 2524970368U, 2533359232U, + 2541743488U, 2550134144U, 2558525056U, 2566913408U, 2575301504U, + 2583686528U, 2592073856U, 2600467328U, 2608856192U, 2617240448U, + 2625631616U, 2634022016U, 2642407552U, 2650796416U, 2659188352U, + 2667574912U, 2675965312U, 2684352896U, 2692738688U, 2701130624U, + 2709518464U, 2717907328U, 2726293376U, 2734685056U, 2743073152U, + 2751462016U, 2759851648U, 2768232832U, 2776625536U, 2785017728U, + 2793401984U, 2801794432U, 2810182016U, 2818571648U, 2826959488U, + 2835349376U, 2843734144U, 2852121472U, 2860514432U, 2868900992U, + 2877286784U, 2885676928U, 2894069632U, 2902451584U, 2910843008U, + 2919234688U, 2927622784U, 2936011648U, 2944400768U, 2952789376U, + 2961177728U, 2969565568U, 2977951616U, 2986338944U, 2994731392U, + 3003120256U, 3011508352U, 3019895936U, 3028287104U, 3036675968U, + 3045063808U, 3053452928U, 3061837696U, 3070228352U, 3078615424U, + 3087003776U, 3095394944U, 3103782272U, 3112173184U, 3120562048U, + 3128944768U, 3137339264U, 3145725056U, 3154109312U, 3162505088U, + 3170893184U, 3179280256U, 3187669376U, 3196056704U, 3204445568U, + 3212836736U, 3221224064U, 3229612928U, 3238002304U, 3246391168U, + 3254778496U, 3263165824U, 3271556224U, 3279944576U, 3288332416U, + 3296719232U, 3305110912U, 3313500032U, 3321887104U, 3330273152U, + 3338658944U, 3347053184U, 3355440512U, 3363827072U, 3372220288U, + 3380608384U, 3388997504U, 3397384576U, 3405774208U, 3414163072U, + 3422551936U, 3430937984U, 3439328384U, 3447714176U, 3456104576U, + 3464493952U, 3472883584U, 3481268864U, 3489655168U, 3498048896U, + 3506434432U, 3514826368U, 3523213952U, 3531603584U, 3539987072U, + 3548380288U, 3556763264U, 3565157248U, 3573545344U, 3581934464U, + 3590324096U, 3598712704U, 3607098752U, 3615488384U, 3623877248U, + 3632265856U, 3640646528U, 3649043584U, 3657430144U, 3665821568U, + 3674207872U, 3682597504U, 3690984832U, 3699367808U, 3707764352U, + 3716152448U, 3724541056U, 3732925568U, 3741318016U, 3749706368U, + 3758091136U, 3766481536U, 3774872704U, 3783260032U, 3791650432U, + 3800036224U, 3808427648U, 3816815488U, 3825204608U, 3833592704U, + 3841981568U, 3850370432U, 3858755968U, 3867147904U, 3875536256U, + 3883920512U, 3892313728U, 3900702592U, 3909087872U, 3917478784U, + 3925868416U, 3934256512U, 3942645376U, 3951032192U, 3959422336U, + 3967809152U, 3976200064U, 3984588416U, 3992974976U, 4001363584U, + 4009751168U, 4018141312U, 4026530432U, 4034911616U, 4043308928U, + 4051695488U, 4060084352U, 4068472448U, 4076862848U, 4085249408U, + 4093640576U, 4102028416U, 4110413696U, 4118805632U, 4127194496U, + 4135583104U, 4143971968U, 4152360832U, 4160746112U, 4169135744U, + 4177525888U, 4185912704U, 4194303616U, 4202691968U, 4211076736U, + 4219463552U, 4227855488U, 4236246656U, 4244633728U, 4253022848U, + 4261412224U, 4269799808U, 4278184832U, 4286578048U, 4294962304U, + 4303349632U, 4311743104U, 4320130432U, 4328521088U, 4336909184U, + 4345295488U, 4353687424U, 4362073472U, 4370458496U, 4378852736U, + 4387238528U, 4395630208U, 4404019072U, 4412407424U, 4420790656U, + 4429182848U, 4437571456U, 4445962112U, 4454344064U, 4462738048U, + 4471119232U, 4479516544U, 4487904128U, 4496289664U, 4504682368U, + 4513068416U, 4521459584U, 4529846144U, 4538232704U, 4546619776U, + 4555010176U, 4563402112U, 4571790208U, 4580174464U, 4588567936U, + 4596957056U, 4605344896U, 4613734016U, 4622119808U, 4630511488U, + 4638898816U, 4647287936U, 4655675264U, 4664065664U, 4672451968U, + 4680842624U, 4689231488U, 4697620352U, 4706007424U, 4714397056U, + 4722786176U, 4731173248U, 4739562368U, 4747951744U, 4756340608U, + 4764727936U, 4773114496U, 4781504384U, 4789894784U, 4798283648U, + 4806667648U, 4815059584U, 4823449472U, 4831835776U, 4840226176U, + 4848612224U, 4857003392U, 4865391488U, 4873780096U, 4882169728U, + 4890557312U, 4898946944U, 4907333248U, 4915722368U, 4924110976U, + 4932499328U, 4940889728U, 4949276032U, 4957666432U, 4966054784U, + 4974438016U, 4982831488U, 4991221376U, 4999607168U, 5007998848U, + 5016386432U, 5024763776U, 5033164672U, 5041544576U, 5049941888U, + 5058329728U, 5066717056U, 5075107456U, 5083494272U, 5091883904U, + 5100273536U, 5108662144U, 5117048192U, 5125436032U, 5133827456U, + 5142215296U, 5150605184U, 5158993024U, 5167382144U, 5175769472U, + 5184157568U, 5192543872U, 5200936064U, 5209324928U, 5217711232U, + 5226102656U, 5234490496U, 5242877312U, 5251263872U, 5259654016U, + 5268040832U, 5276434304U, 5284819328U, 5293209728U, 5301598592U, + 5309986688U, 5318374784U, 5326764416U, 5335151488U, 5343542144U, + 5351929472U, 5360319872U, 5368706944U, 5377096576U, 5385484928U, + 5393871232U, 5402263424U, 5410650496U, 5419040384U, 5427426944U, + 5435816576U, 5444205952U, 5452594816U, 5460981376U, 5469367936U, + 5477760896U, 5486148736U, 5494536832U, 5502925952U, 5511315328U, + 5519703424U, 5528089984U, 5536481152U, 5544869504U, 5553256064U, + 5561645696U, 5570032768U, 5578423936U, 5586811264U, 5595193216U, + 5603585408U, 5611972736U, 5620366208U, 5628750464U, 5637143936U, + 5645528192U, 5653921408U, 5662310272U, 5670694784U, 5679082624U, + 5687474048U, 5695864448U, 5704251008U, 5712641408U, 5721030272U, + 5729416832U, 5737806208U, 5746194304U, 5754583936U, 5762969984U, + 5771358592U, 5779748224U, 5788137856U, 5796527488U, 5804911232U, + 5813300608U, 5821692544U, 5830082176U, 5838468992U, 5846855552U, + 5855247488U, 5863636096U, 5872024448U, 5880411008U, 5888799872U, + 5897186432U, 5905576832U, 5913966976U, 5922352768U, 5930744704U, + 5939132288U, 5947522432U, 5955911296U, 5964299392U, 5972688256U, + 5981074304U, 5989465472U, 5997851008U, 6006241408U, 6014627968U, + 6023015552U, 6031408256U, 6039796096U, 6048185216U, 6056574848U, + 6064963456U, 6073351808U, 6081736064U, 6090128768U, 6098517632U, + 6106906496U, 6115289216U, 6123680896U, 6132070016U, 6140459648U, + 6148849024U, 6157237376U, 6165624704U, 6174009728U, 6182403712U, + 6190792064U, 6199176064U, 6207569792U, 6215952256U, 6224345216U, + 6232732544U, 6241124224U, 6249510272U, 6257899136U, 6266287744U, + 6274676864U, 6283065728U, 6291454336U, 6299843456U, 6308232064U, + 6316620928U, 6325006208U, 6333395584U, 6341784704U, 6350174848U, + 6358562176U, 6366951296U, 6375337856U, 6383729536U, 6392119168U, + 6400504192U, 6408895616U, 6417283456U, 6425673344U, 6434059136U, + 6442444672U, 6450837376U, 6459223424U, 6467613056U, 6476004224U, + 6484393088U, 6492781952U, 6501170048U, 6509555072U, 6517947008U, + 6526336384U, 6534725504U, 6543112832U, 6551500672U, 6559888768U, + 6568278656U, 6576662912U, 6585055616U, 6593443456U, 6601834112U, + 6610219648U, 6618610304U, 6626999168U, 6635385472U, 6643777408U, + 6652164224U, 6660552832U, 6668941952U, 6677330048U, 6685719424U, + 6694107776U, 6702493568U, 6710882176U, 6719274112U, 6727662976U, + 6736052096U, 6744437632U, 6752825984U, 6761213824U, 6769604224U, + 6777993856U, 6786383488U, 6794770816U, 6803158144U, 6811549312U, + 6819937664U, 6828326528U, 6836706176U, 6845101696U, 6853491328U, + 6861880448U, 6870269312U, 6878655104U, 6887046272U, 6895433344U, + 6903822208U, 6912212864U, 6920596864U, 6928988288U, 6937377152U, + 6945764992U, 6954149248U, 6962544256U, 6970928768U, 6979317376U, + 6987709312U, 6996093824U, 7004487296U, 7012875392U, 7021258624U, + 7029652352U, 7038038912U, 7046427776U, 7054818944U, 7063207808U, + 7071595136U, 7079980928U, 7088372608U, 7096759424U, 7105149824U, + 7113536896U, 7121928064U, 7130315392U, 7138699648U, 7147092352U, + 7155479168U, 7163865728U, 7172249984U, 7180648064U, 7189036672U, + 7197424768U, 7205810816U, 7214196608U, 7222589824U, 7230975104U, + 7239367552U, 7247755904U, 7256145536U, 7264533376U, 7272921472U, + 7281308032U, 7289694848U, 7298088832U, 7306471808U, 7314864512U, + 7323253888U, 7331643008U, 7340029568U, 7348419712U, 7356808832U, + 7365196672U, 7373585792U, 7381973888U, 7390362752U, 7398750592U, + 7407138944U, 7415528576U, 7423915648U, 7432302208U, 7440690304U, + 7449080192U, 7457472128U, 7465860992U, 7474249088U, 7482635648U, + 7491023744U, 7499412608U, 7507803008U, 7516192384U, 7524579968U, + 7532967296U, 7541358464U, 7549745792U, 7558134656U, 7566524032U, + 7574912896U, 7583300992U, 7591690112U, 7600075136U, 7608466816U, + 7616854912U, 7625244544U, 7633629824U, 7642020992U, 7650410368U, + 7658794112U, 7667187328U, 7675574912U, 7683961984U, 7692349568U, + 7700739712U, 7709130368U, 7717519232U, 7725905536U, 7734295424U, + 7742683264U, 7751069056U, 7759457408U, 7767849088U, 7776238208U, + 7784626816U, 7793014912U, 7801405312U, 7809792128U, 7818179968U, + 7826571136U, 7834957184U, 7843347328U, 7851732352U, 7860124544U, + 7868512384U, 7876902016U, 7885287808U, 7893679744U, 7902067072U, + 7910455936U, 7918844288U, 7927230848U, 7935622784U, 7944009344U, + 7952400256U, 7960786048U, 7969176704U, 7977565312U, 7985953408U, + 7994339968U, 8002730368U, 8011119488U, 8019508096U, 8027896192U, + 8036285056U, 8044674688U, 8053062272U, 8061448832U, 8069838464U, + 8078227328U, 8086616704U, 8095006592U, 8103393664U, 8111783552U, + 8120171392U, 8128560256U, 8136949376U, 8145336704U, 8153726848U, + 8162114944U, 8170503296U, 8178891904U, 8187280768U, 8195669632U, + 8204058496U, 8212444544U, 8220834176U, 8229222272U, 8237612672U, + 8246000768U, 8254389376U, 8262775168U, 8271167104U, 8279553664U, + 8287944064U, 8296333184U, 8304715136U, 8313108352U, 8321497984U, + 8329885568U, 8338274432U, 8346663296U, 8355052928U, 8363441536U, + 8371828352U, 8380217984U, 8388606592U, 8396996224U, 8405384576U, + 8413772672U, 8422161536U, 8430549376U, 8438939008U, 8447326592U, + 8455715456U, 8464104832U, 8472492928U, 8480882048U, 8489270656U, + 8497659776U, 8506045312U, 8514434944U, 8522823808U, 8531208832U, + 8539602304U, 8547990656U, 8556378752U, 8564768384U, 8573154176U, + 8581542784U, 8589933952U, 8598322816U, 8606705024U, 8615099264U, + 8623487872U, 8631876992U, 8640264064U, 8648653952U, 8657040256U, + 8665430656U, 8673820544U, 8682209152U, 8690592128U, 8698977152U, + 8707374464U, 8715763328U, 8724151424U, 8732540032U, 8740928384U, + 8749315712U, 8757704576U, 8766089344U, 8774480768U, 8782871936U, + 8791260032U, 8799645824U, 8808034432U, 8816426368U, 8824812928U, + 8833199488U, 8841591424U, 8849976448U, 8858366336U, 8866757248U, + 8875147136U, 8883532928U, 8891923328U, 8900306816U, 8908700288U, + 8917088384U, 8925478784U, 8933867392U, 8942250368U, 8950644608U, + 8959032704U, 8967420544U, 8975809664U, 8984197504U, 8992584064U, + 9000976256U, 9009362048U, 9017752448U, 9026141312U, 9034530688U, + 9042917504U, 9051307904U, 9059694208U, 9068084864U, 9076471424U, + 9084861824U, 9093250688U, 9101638528U, 9110027648U, 9118416512U, + 9126803584U, 9135188096U, 9143581312U, 9151969664U, 9160356224U, + 9168747136U, 9177134464U, 9185525632U, 9193910144U, 9202302848U, + 9210690688U, 9219079552U, 9227465344U, 9235854464U, 9244244864U, + 9252633472U, 9261021824U, 9269411456U, 9277799296U, 9286188928U, + 9294574208U, 9302965888U, 9311351936U, 9319740032U, 9328131968U, + 9336516736U, 9344907392U, 9353296768U, 9361685888U, 9370074752U, + 9378463616U, 9386849408U, 9395239808U, 9403629184U, 9412016512U, + 9420405376U, 9428795008U, 9437181568U, 9445570688U, 9453960832U, + 9462346624U, 9470738048U, 9479121536U, 9487515008U, 9495903616U, + 9504289664U, 9512678528U, 9521067904U, 9529456256U, 9537843584U, + 9546233728U, 9554621312U, 9563011456U, 9571398784U, 9579788672U, + 9588178304U, 9596567168U, 9604954496U, 9613343104U, 9621732992U, + 9630121856U, 9638508416U, 9646898816U, 9655283584U, 9663675776U, + 9672061312U, 9680449664U, 9688840064U, 9697230464U, 9705617536U, + 9714003584U, 9722393984U, 9730772608U, 9739172224U, 9747561088U, + 9755945344U, 9764338816U, 9772726144U, 9781116544U, 9789503872U, + 9797892992U, 9806282624U, 9814670464U, 9823056512U, 9831439232U, + 9839833984U, 9848224384U, 9856613504U, 9865000576U, 9873391232U, + 9881772416U, 9890162816U, 9898556288U, 9906940544U, 9915333248U, + 9923721088U, 9932108672U, 9940496512U, 9948888448U, 9957276544U, + 9965666176U, 9974048384U, 9982441088U, 9990830464U, 9999219584U, + 10007602816U, 10015996544U, 10024385152U, 10032774016U, 10041163648U, + 10049548928U, 10057940096U, 10066329472U, 10074717824U, 10083105152U, + 10091495296U, 10099878784U, 10108272256U, 10116660608U, 10125049216U, + 10133437312U, 10141825664U, 10150213504U, 10158601088U, 10166991232U, + 10175378816U, 10183766144U, 10192157312U, 10200545408U, 10208935552U, + 10217322112U, 10225712768U, 10234099328U, 10242489472U, 10250876032U, + 10259264896U, 10267656064U, 10276042624U, 10284429184U, 10292820352U, + 10301209472U, 10309598848U, 10317987712U, 10326375296U, 10334763392U, + 10343153536U, 10351541632U, 10359930752U, 10368318592U, 10376707456U, + 10385096576U, 10393484672U, 10401867136U, 10410262144U, 10418647424U, + 10427039104U, 10435425664U, 10443810176U, 10452203648U, 10460589952U, + 10468982144U, 10477369472U, 10485759104U, 10494147712U, 10502533504U, + 10510923392U, 10519313536U, 10527702656U, 10536091264U, 10544478592U, + 10552867712U, 10561255808U, 10569642368U, 10578032768U, 10586423168U, + 10594805632U, 10603200128U, 10611588992U, 10619976064U, 10628361344U, + 10636754048U, 10645143424U, 10653531776U, 10661920384U, 10670307968U, + 10678696832U, 10687086464U, 10695475072U, 10703863168U, 10712246144U, + 10720639616U, 10729026688U, 10737414784U, 10745806208U, 10754190976U, + 10762581376U, 10770971264U, 10779356288U, 10787747456U, 10796135552U, + 10804525184U, 10812915584U, 10821301888U, 10829692288U, 10838078336U, + 10846469248U, 10854858368U, 10863247232U, 10871631488U, 10880023424U, + 10888412032U, 10896799616U, 10905188992U, 10913574016U, 10921964672U, + 10930352768U, 10938742912U, 10947132544U, 10955518592U, 10963909504U, + 10972298368U, 10980687488U, 10989074816U, 10997462912U, 11005851776U, + 11014241152U, 11022627712U, 11031017344U, 11039403904U, 11047793024U, + 11056184704U, 11064570752U, 11072960896U, 11081343872U, 11089737856U, + 11098128256U, 11106514816U, 11114904448U, 11123293568U, 11131680128U, + 11140065152U, 11148458368U, 11156845696U, 11165236864U, 11173624192U, + 11182013824U, 11190402688U, 11198790784U, 11207179136U, 11215568768U, + 11223957376U, 11232345728U, 11240734592U, 11249122688U, 11257511296U, + 11265899648U, 11274285952U, 11282675584U, 11291065472U, 11299452544U, + 11307842432U, 11316231296U, 11324616832U, 11333009024U, 11341395584U, + 11349782656U, 11358172288U, 11366560384U, 11374950016U, 11383339648U, + 11391721856U, 11400117376U, 11408504192U, 11416893568U, 11425283456U, + 11433671552U, 11442061184U, 11450444672U, 11458837888U, 11467226752U, + 11475611776U, 11484003968U, 11492392064U, 11500780672U, 11509169024U, + 11517550976U, 11525944448U, 11534335616U, 11542724224U, 11551111808U, + 11559500672U, 11567890304U, 11576277376U, 11584667008U, 11593056128U, + 11601443456U, 11609830016U, 11618221952U, 11626607488U, 11634995072U, + 11643387776U, 11651775104U, 11660161664U, 11668552576U, 11676940928U, + 11685330304U, 11693718656U, 11702106496U, 11710496128U, 11718882688U, + 11727273088U, 11735660416U, 11744050048U, 11752437376U, 11760824704U, + 11769216128U, 11777604736U, 11785991296U, 11794381952U, 11802770048U, + 11811157888U, 11819548544U, 11827932544U, 11836324736U, 11844713344U, + 11853100928U, 11861486464U, 11869879936U, 11878268032U, 11886656896U, + 11895044992U, 11903433088U, 11911822976U, 11920210816U, 11928600448U, + 11936987264U, 11945375872U, 11953761152U, 11962151296U, 11970543488U, + 11978928512U, 11987320448U, 11995708288U, 12004095104U, 12012486272U, + 12020875136U, 12029255552U, 12037652096U, 12046039168U, 12054429568U, + 12062813824U, 12071206528U, 12079594624U, 12087983744U, 12096371072U, + 12104759936U, 12113147264U, 12121534592U, 12129924992U, 12138314624U, + 12146703232U, 12155091584U, 12163481216U, 12171864704U, 12180255872U, + 12188643968U, 12197034112U, 12205424512U, 12213811328U, 12222199424U, + 12230590336U, 12238977664U, 12247365248U, 12255755392U, 12264143488U, + 12272531584U, 12280920448U, 12289309568U, 12297694592U, 12306086528U, + 12314475392U, 12322865024U, 12331253632U, 12339640448U, 12348029312U, + 12356418944U, 12364805248U, 12373196672U, 12381580928U, 12389969024U, + 12398357632U, 12406750592U, 12415138432U, 12423527552U, 12431916416U, + 12440304512U, 12448692352U, 12457081216U, 12465467776U, 12473859968U, + 12482245504U, 12490636672U, 12499025536U, 12507411584U, 12515801728U, + 12524190592U, 12532577152U, 12540966272U, 12549354368U, 12557743232U, + 12566129536U, 12574523264U, 12582911872U, 12591299456U, 12599688064U, + 12608074624U, 12616463488U, 12624845696U, 12633239936U, 12641631616U, + 12650019968U, 12658407296U, 12666795136U, 12675183232U, 12683574656U, + 12691960192U, 12700350592U, 12708740224U, 12717128576U, 12725515904U, + 12733906816U, 12742295168U, 12750680192U, 12759071872U, 12767460736U, + 12775848832U, 12784236928U, 12792626816U, 12801014656U, 12809404288U, + 12817789312U, 12826181504U, 12834568832U, 12842954624U, 12851345792U, + 12859732352U, 12868122496U, 12876512128U, 12884901248U, 12893289088U, + 12901672832U, 12910067584U, 12918455168U, 12926842496U, 12935232896U, + 12943620736U, 12952009856U, 12960396928U, 12968786816U, 12977176192U, + 12985563776U, 12993951104U, 13002341504U, 13010730368U, 13019115392U, + 13027506304U, 13035895168U, 13044272512U, 13052673152U, 13061062528U, + 13069446272U, 13077838976U, 13086227072U, 13094613632U, 13103000192U, + 13111393664U, 13119782528U, 13128157568U, 13136559232U, 13144945024U, + 13153329536U, 13161724288U, 13170111872U, 13178502784U, 13186884736U, + 13195279744U, 13203667072U, 13212057472U, 13220445824U, 13228832128U, + 13237221248U, 13245610624U, 13254000512U, 13262388352U, 13270777472U, + 13279166336U, 13287553408U, 13295943296U, 13304331904U, 13312719488U, + 13321108096U, 13329494656U, 13337885824U, 13346274944U, 13354663808U, + 13363051136U, 13371439232U, 13379825024U, 13388210816U, 13396605056U, + 13404995456U, 13413380224U, 13421771392U, 13430159744U, 13438546048U, + 13446937216U, 13455326848U, 13463708288U, 13472103808U, 13480492672U, + 13488875648U, 13497269888U, 13505657728U, 13514045312U, 13522435712U, + 13530824576U, 13539210112U, 13547599232U, 13555989376U, 13564379008U, + 13572766336U, 13581154432U, 13589544832U, 13597932928U, 13606320512U, + 13614710656U, 13623097472U, 13631477632U, 13639874944U, 13648264064U, + 13656652928U, 13665041792U, 13673430656U, 13681818496U, 13690207616U, + 13698595712U, 13706982272U, 13715373184U, 13723762048U, 13732150144U, + 13740536704U, 13748926592U, 13757316224U, 13765700992U, 13774090112U, + 13782477952U, 13790869376U, 13799259008U, 13807647872U, 13816036736U, + 13824425344U, 13832814208U, 13841202304U, 13849591424U, 13857978752U, + 13866368896U, 13874754688U, 13883145344U, 13891533184U, 13899919232U, + 13908311168U, 13916692096U, 13925085056U, 13933473152U, 13941866368U, + 13950253696U, 13958643584U, 13967032192U, 13975417216U, 13983807616U, + 13992197504U, 14000582272U, 14008973696U, 14017363072U, 14025752192U, + 14034137984U, 14042528384U, 14050918016U, 14059301504U, 14067691648U, + 14076083584U, 14084470144U, 14092852352U, 14101249664U, 14109635968U, + 14118024832U, 14126407552U, 14134804352U, 14143188608U, 14151577984U, + 14159968384U, 14168357248U, 14176741504U, 14185127296U, 14193521024U, + 14201911424U, 14210301824U, 14218685056U, 14227067264U, 14235467392U, + 14243855488U, 14252243072U, 14260630144U, 14269021568U, 14277409408U, + 14285799296U, 14294187904U, 14302571392U, 14310961792U, 14319353728U, + 14327738752U, 14336130944U, 14344518784U, 14352906368U, 14361296512U, + 14369685376U, 14378071424U, 14386462592U, 14394848128U, 14403230848U, + 14411627392U, 14420013952U, 14428402304U, 14436793472U, 14445181568U, + 14453569664U, 14461959808U, 14470347904U, 14478737024U, 14487122816U, + 14495511424U, 14503901824U, 14512291712U, 14520677504U, 14529064832U, + 14537456768U, 14545845632U, 14554234496U, 14562618496U, 14571011456U, + 14579398784U, 14587789184U, 14596172672U, 14604564608U, 14612953984U, + 14621341312U, 14629724288U, 14638120832U, 14646503296U, 14654897536U, + 14663284864U, 14671675264U, 14680061056U, 14688447616U, 14696835968U, + 14705228416U, 14713616768U, 14722003328U, 14730392192U, 14738784128U, + 14747172736U, 14755561088U, 14763947648U, 14772336512U, 14780725376U, + 14789110144U, 14797499776U, 14805892736U, 14814276992U, 14822670208U, + 14831056256U, 14839444352U, 14847836032U, 14856222848U, 14864612992U, + 14872997504U, 14881388672U, 14889775744U, 14898165376U, 14906553472U, + 14914944896U, 14923329664U, 14931721856U, 14940109696U, 14948497024U, + 14956887424U, 14965276544U, 14973663616U, 14982053248U, 14990439808U, + 14998830976U, 15007216768U, 15015605888U, 15023995264U, 15032385152U, + 15040768384U, 15049154944U, 15057549184U, 15065939072U, 15074328448U, + 15082715008U, 15091104128U, 15099493504U, 15107879296U, 15116269184U, + 15124659584U, 15133042304U, 15141431936U, 15149824384U, 15158214272U, + 15166602368U, 15174991232U, 15183378304U, 15191760512U, 15200154496U, + 15208542592U, 15216931712U, 15225323392U, 15233708416U, 15242098048U, + 15250489216U, 15258875264U, 15267265408U, 15275654528U, 15284043136U, + 15292431488U, 15300819584U, 15309208192U, 15317596544U, 15325986176U, + 15334374784U, 15342763648U, 15351151744U, 15359540608U, 15367929728U, + 15376318336U, 15384706432U, 15393092992U, 15401481856U, 15409869952U, + 15418258816U, 15426649984U, 15435037568U, 15443425664U, 15451815296U, + 15460203392U, 15468589184U, 15476979328U, 15485369216U, 15493755776U, + 15502146944U, 15510534272U, 15518924416U, 15527311232U, 15535699072U, + 15544089472U, 15552478336U, 15560866688U, 15569254528U, 15577642624U, + 15586031488U, 15594419072U, 15602809472U, 15611199104U, 15619586432U, + 15627975296U, 15636364928U, 15644753792U, 15653141888U, 15661529216U, + 15669918848U, 15678305152U, 15686696576U, 15695083136U, 15703474048U, + 15711861632U, 15720251264U, 15728636288U, 15737027456U, 15745417088U, + 15753804928U, 15762194048U, 15770582656U, 15778971008U, 15787358336U, + 15795747712U, 15804132224U, 15812523392U, 15820909696U, 15829300096U, + 15837691264U, 15846071936U, 15854466944U, 15862855808U, 15871244672U, + 15879634816U, 15888020608U, 15896409728U, 15904799104U, 15913185152U, + 15921577088U, 15929966464U, 15938354816U, 15946743424U, 15955129472U, + 15963519872U, 15971907968U, 15980296064U, 15988684928U, 15997073024U, + 16005460864U, 16013851264U, 16022241152U, 16030629248U, 16039012736U, + 16047406976U, 16055794816U, 16064181376U, 16072571264U, 16080957824U, + 16089346688U, 16097737856U, 16106125184U, 16114514816U, 16122904192U, + 16131292544U, 16139678848U, 16148066944U, 16156453504U, 16164839552U, + 16173236096U, 16181623424U, 16190012032U, 16198401152U, 16206790528U, + 16215177344U, 16223567744U, 16231956352U, 16240344704U, 16248731008U, + 16257117824U, 16265504384U, 16273898624U, 16282281856U, 16290668672U, + 16299064192U, 16307449216U, 16315842176U, 16324230016U, 16332613504U, + 16341006464U, 16349394304U, 16357783168U, 16366172288U, 16374561664U, + 16382951296U, 16391337856U, 16399726208U, 16408116352U, 16416505472U, + 16424892032U, 16433282176U, 16441668224U, 16450058624U, 16458448768U, + 16466836864U, 16475224448U, 16483613056U, 16492001408U, 16500391808U, + 16508779648U, 16517166976U, 16525555328U, 16533944192U, 16542330752U, + 16550719616U, 16559110528U, 16567497088U, 16575888512U, 16584274816U, + 16592665472U, 16601051008U, 16609442944U, 16617832064U, 16626218624U, + 16634607488U, 16642996096U, 16651385728U, 16659773824U, 16668163712U, + 16676552576U, 16684938112U, 16693328768U, 16701718144U, 16710095488U, + 16718492288U, 16726883968U, 16735272832U, 16743661184U, 16752049792U, + 16760436608U, 16768827008U, 16777214336U, 16785599104U, 16793992832U, + 16802381696U, 16810768768U, 16819151744U, 16827542656U, 16835934848U, + 16844323712U, 16852711552U, 16861101952U, 16869489536U, 16877876864U, + 16886265728U, 16894653056U, 16903044736U, 16911431296U, 16919821696U, + 16928207488U, 16936592768U, 16944987776U, 16953375616U, 16961763968U, + 16970152832U, 16978540928U, 16986929536U, 16995319168U, 17003704448U, + 17012096896U, 17020481152U, 17028870784U, 17037262208U, 17045649536U, + 17054039936U, 17062426496U, 17070814336U, 17079205504U, 17087592064U, + 17095978112U, 17104369024U, 17112759424U, 17121147776U, 17129536384U, + 17137926016U, 17146314368U, 17154700928U, 17163089792U, 17171480192U, + 17179864192U, 17188256896U, 17196644992U, 17205033856U, 17213423488U, + 17221811072U, 17230198912U, 17238588032U, 17246976896U, 17255360384U, + 17263754624U, 17272143232U, 17280530048U, 17288918912U, 17297309312U, + 17305696384U, 17314085504U, 17322475136U, 17330863744U, 17339252096U, + 17347640192U, 17356026496U, 17364413824U, 17372796544U, 17381190016U, + 17389583488U, 17397972608U, 17406360704U, 17414748544U, 17423135872U, + 17431527296U, 17439915904U, 17448303232U, 17456691584U, 17465081728U, + 17473468288U, 17481857408U, 17490247552U, 17498635904U, 17507022464U, + 17515409024U, 17523801728U, 17532189824U, 17540577664U, 17548966016U, + 17557353344U, 17565741184U, 17574131584U, 17582519168U, 17590907008U, + 17599296128U, 17607687808U, 17616076672U, 17624455808U, 17632852352U, + 17641238656U, 17649630848U, 17658018944U, 17666403968U, 17674794112U, + 17683178368U, 17691573376U, 17699962496U, 17708350592U, 17716739968U, + 17725126528U, 17733517184U, 17741898112U, 17750293888U, 17758673024U, + 17767070336U, 17775458432U, 17783848832U, 17792236928U, 17800625536U, + 17809012352U, 17817402752U, 17825785984U, 17834178944U, 17842563968U, + 17850955648U, 17859344512U, 17867732864U, 17876119424U, 17884511872U, + 17892900224U, 17901287296U, 17909677696U, 17918058112U, 17926451072U, + 17934843776U, 17943230848U, 17951609216U, 17960008576U, 17968397696U, + 17976784256U, 17985175424U, 17993564032U, 18001952128U, 18010339712U, + 18018728576U, 18027116672U, 18035503232U, 18043894144U, 18052283264U, + 18060672128U, 18069056384U, 18077449856U, 18085837184U, 18094225792U, + 18102613376U, 18111004544U, 18119388544U, 18127781248U, 18136170368U, + 18144558976U, 18152947328U, 18161336192U, 18169724288U, 18178108544U, + 18186498944U, 18194886784U, 18203275648U, 18211666048U, 18220048768U, + 18228444544U, 18236833408U, 18245220736U +}; + + +// Generated with the following Mathematica Code: + +// GetCacheSizes[n_] := Module[{ +// DataSetSizeBytesInit = 2^30, +// MixBytes = 128, +// DataSetGrowth = 2^23, +// HashBytes = 64, +// CacheMultiplier = 1024, +// j = 0}, +// Reap[ +// While[j < n, +// Module[{i = Floor[(DataSetSizeBytesInit + DataSetGrowth * j) / (CacheMultiplier * HashBytes)]}, +// While[! PrimeQ[i], i--]; +// Sow[i*HashBytes]; j++]]]][[2]][[1]] + +const size_t cache_sizes[] = { + 1048384U, 1055552U, 1064512U, 1072832U, 1080896U, 1089344U, 1096768U, + 1104448U, 1113664U, 1121216U, 1130176U, 1138624U, 1146304U, 1155008U, + 1162816U, 1171264U, 1179328U, 1187392U, 1195456U, 1203392U, 1210816U, + 1220416U, 1227712U, 1236416U, 1244608U, 1253312U, 1261376U, 1268416U, + 1277632U, 1285696U, 1294016U, 1302208U, 1310656U, 1318336U, 1326784U, + 1334848U, 1342912U, 1350848U, 1359808U, 1366208U, 1376192U, 1383488U, + 1392448U, 1400384U, 1408832U, 1416512U, 1425344U, 1433408U, 1440704U, + 1449664U, 1458112U, 1466048U, 1474496U, 1482688U, 1490752U, 1498688U, + 1507136U, 1515328U, 1523264U, 1531456U, 1539904U, 1547584U, 1556288U, + 1564352U, 1572544U, 1580608U, 1588544U, 1596992U, 1605568U, 1612096U, + 1621952U, 1630144U, 1637696U, 1645888U, 1654336U, 1662784U, 1671104U, + 1679168U, 1686848U, 1695296U, 1702208U, 1711168U, 1720256U, 1727552U, + 1736128U, 1744576U, 1751488U, 1760576U, 1769408U, 1777472U, 1785664U, + 1793984U, 1801664U, 1810112U, 1818304U, 1826624U, 1834816U, 1842752U, + 1851328U, 1858112U, 1867456U, 1875904U, 1883968U, 1892288U, 1899712U, + 1908416U, 1916608U, 1924544U, 1932992U, 1940672U, 1948736U, 1956928U, + 1965632U, 1973824U, 1982144U, 1989824U, 1998784U, 2006848U, 2014784U, + 2022848U, 2031424U, 2038976U, 2047424U, 2055616U, 2064064U, 2072384U, + 2080448U, 2088512U, 2095936U, 2104768U, 2113472U, 2121664U, 2127808U, + 2137792U, 2146112U, 2153408U, 2162624U, 2170304U, 2178496U, 2186944U, + 2195392U, 2203456U, 2211136U, 2219968U, 2227648U, 2236096U, 2244416U, + 2250944U, 2260928U, 2268736U, 2276672U, 2283328U, 2293696U, 2301632U, + 2309312U, 2317888U, 2325952U, 2334656U, 2342848U, 2350144U, 2358848U, + 2366656U, 2375488U, 2383552U, 2391616U, 2400064U, 2407616U, 2415808U, + 2424256U, 2432704U, 2439616U, 2448704U, 2457152U, 2464064U, 2473792U, + 2482112U, 2489792U, 2497472U, 2506432U, 2514752U, 2522816U, 2531264U, + 2539456U, 2547136U, 2555456U, 2564032U, 2572096U, 2578496U, 2587712U, + 2595776U, 2604736U, 2613056U, 2620736U, 2629184U, 2637632U, 2645824U, + 2653888U, 2662208U, 2670016U, 2678464U, 2686912U, 2694464U, 2703296U, + 2710976U, 2719424U, 2727104U, 2736064U, 2743232U, 2752192U, 2760512U, + 2768704U, 2777024U, 2785088U, 2792512U, 2800576U, 2809024U, 2817856U, + 2826176U, 2833984U, 2840896U, 2850752U, 2858048U, 2867008U, 2875328U, + 2883392U, 2891584U, 2899648U, 2908096U, 2915648U, 2924224U, 2932672U, + 2940736U, 2948672U, 2956736U, 2964928U, 2973248U, 2981824U, 2988992U, + 2997184U, 3005248U, 3013952U, 3022144U, 3030592U, 3037376U, 3046976U, + 3055552U, 3063616U, 3070784U, 3079744U, 3087808U, 3096512U, 3103808U, + 3111872U, 3121088U, 3128896U, 3137216U, 3144896U, 3153856U, 3161152U, + 3169984U, 3178432U, 3186496U, 3194816U, 3203008U, 3210176U, 3218624U, + 3227072U, 3235264U, 3243712U, 3250496U, 3259456U, 3268544U, 3276736U, + 3283648U, 3292736U, 3301184U, 3308224U, 3317696U, 3324736U, 3333184U, + 3342272U, 3348544U, 3357248U, 3365312U, 3374912U, 3383104U, 3390784U, + 3399488U, 3407296U, 3414976U, 3424192U, 3432256U, 3440576U, 3448768U, + 3456832U, 3464896U, 3473216U, 3480128U, 3489344U, 3497408U, 3505856U, + 3514048U, 3521344U, 3530432U, 3538624U, 3546304U, 3555008U, 3563072U, + 3571648U, 3579712U, 3587392U, 3595456U, 3603904U, 3612352U, 3620416U, + 3628864U, 3636928U, 3645248U, 3652928U, 3660992U, 3669184U, 3677888U, + 3685952U, 3694528U, 3702592U, 3710528U, 3719104U, 3727168U, 3735488U, + 3742784U, 3751232U, 3759424U, 3765184U, 3775808U, 3783872U, 3792832U, + 3800768U, 3808832U, 3816256U, 3825344U, 3832768U, 3841856U, 3849536U, + 3857344U, 3866432U, 3874496U, 3882304U, 3890752U, 3899072U, 3907264U, + 3914816U, 3923008U, 3930688U, 3939904U, 3947968U, 3956416U, 3964736U, + 3972544U, 3981248U, 3988928U, 3997376U, 4005824U, 4012864U, 4020928U, + 4030144U, 4038592U, 4045504U, 4054592U, 4063168U, 4071104U, 4079552U, + 4087232U, 4095808U, 4103872U, 4111168U, 4120384U, 4127936U, 4136512U, + 4144832U, 4153024U, 4160704U, 4169408U, 4177216U, 4186048U, 4193344U, + 4202048U, 4210496U, 4217536U, 4227008U, 4235072U, 4243264U, 4251584U, + 4259392U, 4267712U, 4275776U, 4284352U, 4291904U, 4300096U, 4307648U, + 4316992U, 4325056U, 4333376U, 4341056U, 4349888U, 4357568U, 4366016U, + 4374464U, 4382528U, 4390208U, 4398656U, 4407232U, 4413632U, 4423616U, + 4431808U, 4439744U, 4447936U, 4455872U, 4463296U, 4472128U, 4480576U, + 4489024U, 4497344U, 4505152U, 4512448U, 4520896U, 4530112U, 4537664U, + 4546496U, 4554688U, 4562752U, 4570816U, 4579264U, 4586944U, 4595648U, + 4603712U, 4611392U, 4619072U, 4628032U, 4635584U, 4643776U, 4652864U, + 4660672U, 4669376U, 4677056U, 4684096U, 4693184U, 4702144U, 4710208U, + 4718528U, 4726336U, 4734272U, 4742464U, 4750784U, 4759232U, 4767296U, + 4775872U, 4783808U, 4791872U, 4797376U, 4808512U, 4816192U, 4825024U, + 4832704U, 4841024U, 4849472U, 4856512U, 4865984U, 4874176U, 4882112U, + 4889792U, 4898752U, 4906688U, 4913984U, 4922816U, 4931008U, 4938944U, + 4946624U, 4955584U, 4964032U, 4972096U, 4980032U, 4988864U, 4997056U, + 5004992U, 5012288U, 5020096U, 5029312U, 5037632U, 5045696U, 5052224U, + 5062592U, 5070784U, 5078848U, 5086784U, 5095232U, 5100736U, 5111488U, + 5119936U, 5127104U, 5136064U, 5143616U, 5151424U, 5160256U, 5168704U, + 5175232U, 5185472U, 5192384U, 5199296U, 5209664U, 5218112U, 5225536U, + 5233472U, 5242816U, 5250496U, 5258944U, 5267264U, 5274944U, 5283776U, + 5290048U, 5300032U, 5308096U, 5316544U, 5323328U, 5331904U, 5340736U, + 5349056U, 5356864U, 5365312U, 5372096U, 5381696U, 5390272U, 5398336U, + 5405888U, 5413696U, 5422784U, 5430976U, 5439424U, 5446976U, 5455808U, + 5463616U, 5471168U, 5480128U, 5488064U, 5494592U, 5504704U, 5513152U, + 5521216U, 5529536U, 5536576U, 5544256U, 5554112U, 5559616U, 5570368U, + 5577664U, 5586752U, 5594944U, 5603008U, 5611456U, 5619392U, 5627584U, + 5634368U, 5643328U, 5651264U, 5659328U, 5667008U, 5675584U, 5684416U, + 5692864U, 5701568U, 5709632U, 5717056U, 5725376U, 5734336U, 5740096U, + 5750336U, 5758912U, 5766848U, 5775296U, 5782976U, 5790784U, 5799616U, + 5807936U, 5815232U, 5823808U, 5832256U, 5840192U, 5848768U, 5856832U, + 5864896U, 5873344U, 5879872U, 5888576U, 5897792U, 5905216U, 5914432U, + 5920448U, 5930944U, 5938624U, 5947328U, 5955392U, 5963456U, 5971648U, + 5979328U, 5988032U, 5995712U, 6003904U, 6012736U, 6021056U, 6029248U, + 6037184U, 6045632U, 6053312U, 6061376U, 6070208U, 6077504U, 6086464U, + 6094784U, 6101696U, 6110912U, 6118592U, 6127168U, 6135616U, 6143296U, + 6150208U, 6158912U, 6168128U, 6175808U, 6182464U, 6192832U, 6201152U, + 6209344U, 6217664U, 6224576U, 6233408U, 6241472U, 6249664U, 6258496U, + 6266816U, 6275008U, 6281152U, 6291136U, 6299456U, 6306752U, 6314816U, + 6323776U, 6332096U, 6339392U, 6348224U, 6356288U, 6364096U, 6373184U, + 6381376U, 6389696U, 6397504U, 6404416U, 6413632U, 6421952U, 6430016U, + 6437824U, 6446912U, 6454592U, 6463168U, 6471616U, 6478144U, 6487232U, + 6496192U, 6504128U, 6511936U, 6520256U, 6528832U, 6536896U, 6544576U, + 6553408U, 6561472U, 6569792U, 6577216U, 6586304U, 6592448U, 6601024U, + 6610624U, 6619072U, 6627136U, 6634816U, 6643264U, 6650816U, 6659776U, + 6667712U, 6675904U, 6682688U, 6691904U, 6700864U, 6709184U, 6717376U, + 6724544U, 6733504U, 6741824U, 6749888U, 6756032U, 6766528U, 6773056U, + 6782912U, 6790976U, 6798016U, 6807488U, 6815168U, 6823744U, 6832064U, + 6840128U, 6847552U, 6855872U, 6864064U, 6872128U, 6880576U, 6889408U, + 6897472U, 6905792U, 6913472U, 6920896U, 6930368U, 6938432U, 6946624U, + 6953536U, 6963136U, 6971072U, 6979136U, 6986944U, 6995392U, 7003712U, + 7012288U, 7019072U, 7028416U, 7036352U, 7044416U, 7051712U, 7060672U, + 7069376U, 7077568U, 7085504U, 7092544U, 7102016U, 7110592U, 7118656U, + 7126208U, 7135168U, 7143104U, 7150912U, 7159744U, 7167808U, 7175744U, + 7184192U, 7191232U, 7200448U, 7207744U, 7216576U, 7224128U, 7233472U, + 7241536U, 7249856U, 7256512U, 7264832U, 7274048U, 7282112U, 7290176U, + 7298752U, 7306688U, 7315136U, 7322816U, 7331392U, 7339456U, 7347776U, + 7356224U, 7364288U, 7371712U, 7380928U, 7387456U, 7396544U, 7404352U, + 7413568U, 7421632U, 7429696U, 7436864U, 7446464U, 7454144U, 7461952U, + 7470784U, 7478336U, 7487296U, 7495616U, 7503424U, 7511872U, 7520192U, + 7527616U, 7536448U, 7544512U, 7551424U, 7560128U, 7568576U, 7577536U, + 7583552U, 7592512U, 7600448U, 7610048U, 7618496U, 7626176U, 7634752U, + 7642816U, 7651264U, 7659328U, 7667008U, 7675456U, 7683136U, 7691584U, + 7700416U, 7707584U, 7716416U, 7724224U, 7733056U, 7740608U, 7749184U, + 7756096U, 7765952U, 7774016U, 7781824U, 7790528U, 7798592U, 7805888U, + 7814336U, 7822784U, 7831232U, 7839296U, 7847104U, 7855552U, 7863616U, + 7872448U, 7880128U, 7888576U, 7896256U, 7905088U, 7912768U, 7920448U, + 7928768U, 7937344U, 7945792U, 7953728U, 7959488U, 7970752U, 7978816U, + 7987136U, 7994816U, 8003392U, 8011712U, 8019904U, 8027456U, 8035264U, + 8044352U, 8052544U, 8060224U, 8069056U, 8076736U, 8084672U, 8093504U, + 8101312U, 8110016U, 8117696U, 8125888U, 8134592U, 8142016U, 8149952U, + 8159168U, 8166976U, 8175296U, 8183488U, 8191808U, 8199616U, 8207296U, + 8216128U, 8224576U, 8232256U, 8241088U, 8248256U, 8257472U, 8264128U, + 8273728U, 8281792U, 8290112U, 8297152U, 8305216U, 8314816U, 8322752U, + 8330944U, 8339392U, 8347072U, 8355392U, 8363968U, 8371904U, 8379328U, + 8388544U, 8394944U, 8404544U, 8412736U, 8421184U, 8429504U, 8437696U, + 8445376U, 8452544U, 8460736U, 8470208U, 8478016U, 8486848U, 8494144U, + 8503232U, 8511296U, 8519488U, 8527424U, 8534464U, 8543936U, 8552384U, + 8558912U, 8568128U, 8575936U, 8584256U, 8593216U, 8601536U, 8608832U, + 8616896U, 8625728U, 8634176U, 8641856U, 8649664U, 8658112U, 8666176U, + 8674112U, 8682944U, 8691136U, 8699456U, 8707648U, 8716096U, 8724416U, + 8732608U, 8740672U, 8748352U, 8756032U, 8764864U, 8773568U, 8781376U, + 8789824U, 8796992U, 8806208U, 8814272U, 8822336U, 8830912U, 8838848U, + 8847296U, 8854336U, 8863552U, 8871488U, 8879296U, 8887616U, 8894528U, + 8904512U, 8911424U, 8920768U, 8928704U, 8936128U, 8944576U, 8953664U, + 8960576U, 8970176U, 8977984U, 8986304U, 8994112U, 9002432U, 9011008U, + 9018176U, 9026624U, 9035584U, 9043904U, 9052096U, 9059264U, 9068096U, + 9075904U, 9084224U, 9092288U, 9100352U, 9108928U, 9116992U, 9125824U, + 9133504U, 9141824U, 9150272U, 9157952U, 9164608U, 9174848U, 9182912U, + 9190976U, 9199552U, 9205312U, 9215936U, 9222592U, 9232192U, 9240512U, + 9248704U, 9256256U, 9264832U, 9272896U, 9281344U, 9288896U, 9297088U, + 9305536U, 9313984U, 9322304U, 9329728U, 9337792U, 9346112U, 9355072U, + 9363136U, 9371072U, 9378752U, 9387712U, 9395648U, 9404224U, 9411008U, + 9420608U, 9428416U, 9436864U, 9445312U, 9453376U, 9460928U, 9468736U, + 9477824U, 9485248U, 9493696U, 9502144U, 9509056U, 9518528U, 9527104U, + 9535424U, 9543616U, 9551296U, 9559744U, 9568192U, 9576256U, 9584576U, + 9591872U, 9600704U, 9608384U, 9615808U, 9624512U, 9633472U, 9641536U, + 9649856U, 9658048U, 9665728U, 9674432U, 9682496U, 9691072U, 9699136U, + 9707072U, 9715136U, 9722176U, 9732032U, 9740096U, 9747904U, 9756352U, + 9764288U, 9771584U, 9780544U, 9789376U, 9796928U, 9804224U, 9813952U, + 9822016U, 9829696U, 9838016U, 9845824U, 9852992U, 9863104U, 9870656U, + 9878464U, 9887552U, 9895744U, 9903808U, 9912128U, 9920192U, 9927616U, + 9936064U, 9944768U, 9952576U, 9960128U, 9969472U, 9977152U, 9985216U, + 9994048U, 10001216U, 10007744U, 10018496U, 10026944U, 10035136U, 10042432U, + 10051264U, 10059584U, 10067648U, 10075712U, 10083904U, 10091456U, 10100672U, + 10108864U, 10116928U, 10124864U, 10133056U, 10140736U, 10149824U, 10156736U, + 10165952U, 10173376U, 10182208U, 10190528U, 10198336U, 10206272U, 10213696U, + 10223296U, 10231744U, 10238656U, 10247488U, 10256192U, 10263872U, 10272448U, + 10280896U, 10288448U, 10296512U, 10305088U, 10313536U, 10321088U, 10330048U, + 10337984U, 10346176U, 10354112U, 10362304U, 10369088U, 10377152U, 10386752U, + 10394816U, 10403648U, 10411712U, 10418624U, 10427968U, 10436032U, 10444736U, + 10452928U, 10459712U, 10468672U, 10476608U, 10484416U, 10491328U, 10501952U, + 10509376U, 10517824U, 10526528U, 10534336U, 10542656U, 10549696U, 10559168U, + 10566592U, 10575808U, 10583488U, 10590656U, 10599488U, 10607936U, 10616768U, + 10624832U, 10630336U, 10640576U, 10649536U, 10655168U, 10665152U, 10674112U, + 10682176U, 10690496U, 10698176U, 10705216U, 10715072U, 10722752U, 10731328U, + 10739264U, 10746688U, 10754752U, 10761664U, 10770752U, 10779712U, 10787776U, + 10796608U, 10803392U, 10812352U, 10821056U, 10828736U, 10837952U, 10846144U, + 10853824U, 10861376U, 10869952U, 10877248U, 10887104U, 10895296U, 10903232U, + 10910912U, 10918976U, 10927936U, 10935872U, 10944448U, 10952384U, 10960832U, + 10968512U, 10977088U, 10985024U, 10992832U, 11000896U, 11009984U, 11018048U, + 11026112U, 11034304U, 11042624U, 11050432U, 11058368U, 11064512U, 11075392U, + 11083712U, 11091776U, 11099584U, 11107904U, 11115968U, 11124416U, 11131712U, + 11141056U, 11148608U, 11157184U, 11165248U, 11173312U, 11180992U, 11189056U, + 11197376U, 11206592U, 11214656U, 11222336U, 11230784U, 11238464U, 11246528U, + 11254976U, 11263552U, 11271872U, 11279552U, 11288512U, 11296576U, 11304256U, + 11312192U, 11320768U, 11329216U, 11336384U, 11345216U, 11352512U, 11362112U, + 11369408U, 11378624U, 11386688U, 11394496U, 11402816U, 11411264U, 11418688U, + 11427776U, 11435584U, 11444032U, 11452096U, 11459648U, 11467072U, 11476928U, + 11484992U, 11493184U, 11500352U, 11509312U, 11517248U, 11524928U, 11534144U, + 11542208U, 11550272U, 11556416U, 11566784U, 11574208U, 11581376U, 11589568U, + 11599552U, 11607104U, 11616064U, 11623616U, 11632576U, 11639872U, 11648704U, + 11657024U, 11664704U, 11672896U, 11681216U, 11689792U, 11697856U, 11705536U, + 11714368U, 11722688U, 11730496U, 11737408U, 11745728U, 11754304U, 11763008U, + 11770816U, 11779648U, 11788096U, 11795776U, 11804608U, 11812544U, 11820992U, + 11829184U, 11837248U, 11844928U, 11852096U, 11860928U, 11869888U, 11878336U, + 11886272U, 11894336U, 11902144U, 11910848U, 11919296U, 11925952U, 11934784U, + 11943616U, 11951552U, 11960128U, 11968192U, 11976512U, 11983168U, 11992768U, + 12000832U, 12008896U, 12016832U, 12025408U, 12033856U, 12042176U, 12049984U, + 12058048U, 12066112U, 12073792U, 12082624U, 12091328U, 12098752U, 12106816U, + 12115904U, 12124096U, 12131776U, 12140224U, 12148672U, 12156736U, 12164032U, + 12173248U, 12181184U, 12186176U, 12197824U, 12205888U, 12213952U, 12218944U, + 12230336U, 12238784U, 12246592U, 12254272U, 12262336U, 12269888U, 12279104U, + 12287936U, 12295744U, 12304064U, 12312512U, 12319936U, 12328768U, 12337088U, + 12344896U, 12352832U, 12361408U, 12368704U, 12377152U, 12384832U, 12394432U, + 12402496U, 12409024U, 12417728U, 12426688U, 12433216U, 12443584U, 12450752U, + 12459968U, 12468032U, 12475712U, 12484544U, 12492608U, 12500416U, 12508352U, + 12517184U, 12525376U, 12532288U, 12541888U, 12549568U, 12556864U, 12565568U, + 12574528U, 12582208U, 12590528U, 12598592U, 12607424U, 12615488U, 12623552U, + 12631744U, 12638656U, 12647744U, 12656576U, 12664768U, 12672832U, 12680896U, + 12688576U, 12697408U, 12704192U, 12713408U, 12721216U, 12729664U, 12738496U, + 12745792U, 12754496U, 12762688U, 12769472U, 12779456U, 12787648U, 12795712U, + 12804032U, 12812224U, 12819008U, 12828352U, 12836672U, 12844736U, 12851648U, + 12859456U, 12868672U, 12877504U, 12885568U, 12892864U, 12902336U, 12909376U, + 12918208U, 12926656U, 12934976U, 12942784U, 12951104U, 12959552U, 12967744U, + 12976064U, 12984256U, 12991936U, 12999488U, 13007936U, 13016768U, 13021504U, + 13033024U, 13041472U, 13049408U, 13057472U, 13065664U, 13072064U, 13081408U, + 13089344U, 13098688U, 13107008U, 13115072U, 13122752U, 13130944U, 13139648U, + 13147712U, 13155776U, 13162432U, 13172672U, 13180864U, 13188928U, 13196992U, + 13203392U, 13213504U, 13219264U, 13228736U, 13236928U, 13244992U, 13253056U, + 13262528U, 13269952U, 13278784U, 13285952U, 13295552U, 13303616U, 13311808U, + 13319744U, 13328192U, 13336256U, 13344704U, 13352384U, 13360576U, 13369024U, + 13377344U, 13385408U, 13393216U, 13401664U, 13410112U, 13418176U, 13426496U, + 13434688U, 13442368U, 13451072U, 13459136U, 13466944U, 13475648U, 13482944U, + 13491904U, 13500352U, 13508288U, 13516736U, 13524416U, 13532224U, 13541312U, + 13549504U, 13556288U, 13564736U, 13573184U, 13581376U, 13587008U, 13598656U, + 13605952U, 13612864U, 13622464U, 13631168U, 13639616U, 13647808U, 13655104U, + 13663424U, 13671872U, 13680064U, 13688768U, 13696576U, 13705024U, 13712576U, + 13721536U, 13729216U, 13737664U, 13746112U, 13753024U, 13759552U, 13770304U, + 13777856U, 13786688U, 13793984U, 13802176U, 13811264U, 13819328U, 13827904U, + 13835456U, 13844416U, 13851584U, 13860544U, 13868992U, 13877056U, 13884608U, + 13893184U, 13901248U, 13909696U, 13917632U, 13925056U, 13934528U, 13942336U, + 13950784U, 13959104U, 13966912U, 13975232U, 13982656U, 13991872U, 13999936U, + 14007872U, 14016064U, 14024512U, 14032064U, 14040896U, 14049088U, 14057408U, + 14065088U, 14072896U, 14081344U, 14089664U, 14097856U, 14106304U, 14114752U, + 14122688U, 14130752U, 14138816U, 14147008U, 14155072U, 14163904U, 14170432U, + 14180288U, 14187328U, 14196032U, 14204864U, 14212672U, 14220736U, 14229056U, + 14237504U, 14245568U, 14253632U, 14261824U, 14269888U, 14278592U, 14286656U, + 14293696U, 14302784U, 14309696U, 14317504U, 14326336U, 14335936U, 14343232U, + 14352064U, 14359232U, 14368064U, 14376512U, 14384576U, 14393024U, 14401472U, + 14409536U, 14416832U, 14424512U, 14433856U, 14440768U, 14449984U, 14458816U, + 14465728U, 14474816U, 14482112U, 14491328U, 14499392U, 14506816U, 14516032U, + 14524352U, 14531392U, 14540224U, 14547392U, 14556992U, 14565184U, 14573248U, + 14580928U, 14588864U, 14596928U, 14606272U, 14613824U, 14622656U, 14630464U, + 14638912U, 14646976U, 14655296U, 14661952U, 14671808U, 14679872U, 14687936U, + 14696384U, 14704576U, 14710336U, 14720192U, 14729152U, 14736448U, 14745152U, + 14752448U, 14761792U, 14769856U, 14777024U, 14785984U, 14792384U, 14802752U, + 14810816U, 14819264U, 14827328U, 14835136U, 14843072U, 14851264U, 14860096U, + 14867648U, 14876096U, 14884544U, 14892736U, 14900672U, 14907968U, 14917312U, + 14924864U, 14933824U, 14939968U, 14950336U, 14957632U, 14966464U, 14974912U, + 14982592U, 14991296U, 14999104U, 15006272U, 15015232U, 15023936U, 15031616U, + 15040448U, 15047488U, 15055552U, 15063616U, 15073216U, 15079744U, 15088064U, + 15097664U, 15105344U, 15113792U, 15122368U, 15130048U, 15137728U, 15146176U, + 15154112U, 15162688U, 15171392U, 15179456U, 15187264U, 15194176U, 15204032U, + 15212224U, 15220544U, 15227456U, 15237056U, 15245248U, 15253184U, 15261632U, + 15269824U, 15277376U, 15285824U, 15293888U, 15301568U, 15310784U, 15318848U, + 15325504U, 15335104U, 15343168U, 15350848U, 15359936U, 15367232U, 15373376U, + 15384256U, 15392576U, 15400384U, 15408832U, 15417152U, 15424832U, 15433024U, + 15441344U, 15449152U, 15457088U, 15466432U, 15474112U, 15482816U, 15488576U, + 15499072U, 15505856U, 15514816U, 15523264U, 15531584U, 15540032U, 15547328U, + 15553984U, 15564608U, 15571904U, 15579968U, 15589312U, 15597376U, 15605696U, + 15612992U, 15621824U, 15630016U, 15638464U, 15646144U, 15654592U, 15662912U, + 15671104U, 15677248U, 15686848U, 15693376U, 15701696U, 15712064U, 15720256U, + 15728576U, 15736384U, 15744704U, 15752512U, 15761344U, 15769024U, 15777728U, + 15785152U, 15793984U, 15802048U, 15809984U, 15817024U, 15825856U, 15834944U, + 15843008U, 15849664U, 15859136U, 15866432U, 15876032U, 15884096U, 15892288U, + 15900608U, 15908416U, 15916864U, 15924928U, 15930176U, 15941056U, 15949504U, + 15957824U, 15965632U, 15973952U, 15982528U, 15990592U, 15998272U, 16006976U, + 16012736U, 16023104U, 16031296U, 16039616U, 16048064U, 16055744U, 16064192U, + 16071488U, 16080832U, 16088768U, 16097216U, 16104896U, 16112704U, 16121792U, + 16129856U, 16138048U, 16146112U, 16154176U, 16162624U, 16170688U, 16177856U, + 16186816U, 16195136U, 16202176U, 16211648U, 16220096U, 16228288U, 16235584U, + 16244672U, 16252864U, 16260544U, 16269248U, 16277056U, 16285504U, 16291648U, + 16301632U, 16309312U, 16318144U, 16326208U, 16333888U, 16342336U, 16351168U, + 16359232U, 16367552U, 16375616U, 16383296U, 16391744U, 16398016U, 16407616U, + 16415936U, 16424896U, 16432448U, 16440896U, 16449088U, 16457024U, 16465472U, + 16474048U, 16481216U, 16490048U, 16498624U, 16505792U, 16513984U, 16523072U, + 16531136U, 16538944U, 16547264U, 16555328U, 16563776U, 16570816U, 16578112U, + 16587712U, 16596544U, 16604992U, 16613312U, 16620608U, 16629568U, 16637888U, + 16645696U, 16653632U, 16661696U, 16669888U, 16677568U, 16686272U, 16695232U, + 16703168U, 16710464U, 16719424U, 16726592U, 16733888U, 16744384U, 16752448U, + 16760768U, 16768448U, 16776896U, 16785344U, 16793536U, 16801216U, 16809664U, + 16818112U, 16826176U, 16833472U, 16842688U, 16850752U, 16859072U, 16866368U, + 16875328U, 16883392U, 16891712U, 16899776U, 16907456U, 16915264U, 16924352U, + 16931776U, 16940608U, 16949056U, 16957376U, 16965056U, 16973248U, 16981696U, + 16990144U, 16997056U, 17005888U, 17014208U, 17021504U, 17031104U, 17039296U, + 17046976U, 17055424U, 17062592U, 17070016U, 17079488U, 17087936U, 17096512U, + 17104576U, 17113024U, 17121088U, 17129408U, 17136832U, 17145664U, 17152832U, + 17161792U, 17170112U, 17177792U, 17186368U, 17194304U, 17202496U, 17211328U, + 17218624U, 17227712U, 17233984U, 17243584U, 17251904U, 17259712U, 17266624U, + 17276608U, 17284672U, 17292224U, 17301056U, 17309632U, 17317568U, 17326016U, + 17333824U, 17342272U, 17350208U, 17358784U, 17366848U, 17374912U, 17382592U, + 17390656U, 17399488U, 17406784U, 17413952U, 17423936U, 17432512U, 17440448U, + 17447744U, 17456704U, 17464768U, 17472064U, 17481536U, 17489344U, 17495488U, + 17505728U, 17513792U, 17522368U, 17530816U, 17538112U, 17546944U, 17555264U, + 17563072U, 17569856U, 17579456U, 17587904U, 17596352U, 17603776U, 17611712U, + 17620672U, 17628992U, 17637184U, 17645504U, 17653568U, 17661632U, 17669824U, + 17677376U, 17686208U, 17693888U, 17702336U, 17710144U, 17718208U, 17726528U, + 17734336U, 17743808U, 17751872U, 17759936U, 17766592U, 17776448U, 17784512U, + 17791936U, 17801152U, 17809216U, 17817152U +}; + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/endian.h b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/endian.h new file mode 100644 index 000000000..9ca842e47 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/endian.h @@ -0,0 +1,74 @@ +#pragma once + +#include +#include "compiler.h" + +static const uint8_t BitReverseTable256[] = + { + 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, + 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, + 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, + 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, + 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, + 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, + 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, + 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, + 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, + 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, + 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, + 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, + 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, + 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, + 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, + 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF + }; + +static inline uint32_t bitfn_swap32(uint32_t a) { + return (BitReverseTable256[a & 0xff] << 24) | + (BitReverseTable256[(a >> 8) & 0xff] << 16) | + (BitReverseTable256[(a >> 16) & 0xff] << 8) | + (BitReverseTable256[(a >> 24) & 0xff]); +} + +static inline uint64_t bitfn_swap64(uint64_t a) { + return ((uint64_t) bitfn_swap32((uint32_t) (a >> 32))) | + (((uint64_t) bitfn_swap32((uint32_t) a)) << 32); +} + +#if defined(__MINGW32__) || defined(_WIN32) + # define LITTLE_ENDIAN 1234 + # define BYTE_ORDER LITTLE_ENDIAN +#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__) + # include +#elif defined(__OpenBSD__) || defined(__SVR4) + # include +#elif defined(__APPLE__) +# include +#elif defined( BSD ) && (BSD >= 199103) + # include +#elif defined( __QNXNTO__ ) && defined( __LITTLEENDIAN__ ) + # define LITTLE_ENDIAN 1234 + # define BYTE_ORDER LITTLE_ENDIAN +#elif defined( __QNXNTO__ ) && defined( __BIGENDIAN__ ) + # define BIG_ENDIAN 1234 + # define BYTE_ORDER BIG_ENDIAN +#else + +# include + +#endif + + +#if LITTLE_ENDIAN == BYTE_ORDER + +#define fix_endian32(x) (x) +#define fix_endian64(x) (x) + +#elif BIG_ENDIAN == BYTE_ORDER + +#define fix_endian32(x) bitfn_swap32(x) +#define fix_endian64(x) bitfn_swap64(x) + +#else +# error "endian not supported" +#endif // BYTE_ORDER \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/ethash.h b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/ethash.h new file mode 100644 index 000000000..82421b868 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/ethash.h @@ -0,0 +1,95 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ + +/** @file ethash.h +* @date 2015 +*/ +#pragma once + +#include +#include +#include +#include +#include "compiler.h" + +#define REVISION 20 +#define DAGSIZE_BYTES_INIT 1073741824U // 2**30 +#define DAG_GROWTH 8388608U // 2**23 +#define CACHE_MULTIPLIER 1024 +#define EPOCH_LENGTH 30000U +#define MIX_BYTES 128 +#define DAG_PARENTS 256 +#define CACHE_ROUNDS 3 +#define ACCESSES 64 + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct ethash_params { + size_t full_size; // Size of full data set (in bytes, multiple of mix size (128)). + size_t cache_size; // Size of compute cache (in bytes, multiple of node size (64)). +} ethash_params; + +typedef struct ethash_return_value { + uint8_t result[32]; + uint8_t mix_hash[32]; +} ethash_return_value; + +size_t ethash_get_datasize(const uint32_t block_number); +size_t ethash_get_cachesize(const uint32_t block_number); + +// initialize the parameters +static inline void ethash_params_init(ethash_params *params, const uint32_t block_number) { + params->full_size = ethash_get_datasize(block_number); + params->cache_size = ethash_get_cachesize(block_number); +} + +typedef struct ethash_cache { + void *mem; +} ethash_cache; + +void ethash_mkcache(ethash_cache *cache, ethash_params const *params, const uint8_t seed[32]); +void ethash_compute_full_data(void *mem, ethash_params const *params, ethash_cache const *cache); +void ethash_full(ethash_return_value *ret, void const *full_mem, ethash_params const *params, const uint8_t header_hash[32], const uint64_t nonce); +void ethash_light(ethash_return_value *ret, ethash_cache const *cache, ethash_params const *params, const uint8_t header_hash[32], const uint64_t nonce); + +static inline void ethash_prep_light(void *cache, ethash_params const *params, const uint8_t seed[32]) { ethash_cache c; c.mem = cache; ethash_mkcache(&c, params, seed); } +static inline void ethash_compute_light(ethash_return_value *ret, void const *cache, ethash_params const *params, const uint8_t header_hash[32], const uint64_t nonce) { ethash_cache c; c.mem = (void*)cache; ethash_light(ret, &c, params, header_hash, nonce); } +static inline void ethash_prep_full(void *full, ethash_params const *params, void const *cache) { ethash_cache c; c.mem = (void*)cache; ethash_compute_full_data(full, params, &c); } +static inline void ethash_compute_full(ethash_return_value *ret, void const *full, ethash_params const *params, const uint8_t header_hash[32], const uint64_t nonce) { ethash_full(ret, full, params, header_hash, nonce); } + +static inline int ethash_check_difficulty( + const uint8_t hash[32], + const uint8_t difficulty[32]) { + // Difficulty is big endian + for (int i = 0; i < 32; i++) { + if (hash[i] == difficulty[i]) continue; + return hash[i] < difficulty[i]; + } + return 0; +} + +int ethash_quick_check_difficulty( + const uint8_t header_hash[32], + const uint64_t nonce, + const uint8_t mix_hash[32], + const uint8_t difficulty[32]); + +#ifdef __cplusplus +} +#endif diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/fnv.h b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/fnv.h new file mode 100644 index 000000000..edabeaae2 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/fnv.h @@ -0,0 +1,38 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file fnv.h +* @author Matthew Wampler-Doty +* @date 2015 +*/ + +#pragma once +#include +#include "compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define FNV_PRIME 0x01000193 + +static inline uint32_t fnv_hash(const uint32_t x, const uint32_t y) { + return x*FNV_PRIME ^ y; +} + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/internal.c b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/internal.c new file mode 100644 index 000000000..a2b82d375 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/internal.c @@ -0,0 +1,298 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file dash.cpp +* @author Tim Hughes +* @author Matthew Wampler-Doty +* @date 2015 +*/ + +#include +#include +#include +#include "ethash.h" +#include "fnv.h" +#include "endian.h" +#include "internal.h" +#include "data_sizes.h" + +#ifdef WITH_CRYPTOPP + +#include "sha3_cryptopp.h" + +#else +#include "sha3.h" +#endif // WITH_CRYPTOPP + +size_t ethash_get_datasize(const uint32_t block_number) { + assert(block_number / EPOCH_LENGTH < 500); + return dag_sizes[block_number / EPOCH_LENGTH]; +} + +size_t ethash_get_cachesize(const uint32_t block_number) { + assert(block_number / EPOCH_LENGTH < 500); + return cache_sizes[block_number / EPOCH_LENGTH]; +} + +// Follows Sergio's "STRICT MEMORY HARD HASHING FUNCTIONS" (2014) +// https://bitslog.files.wordpress.com/2013/12/memohash-v0-3.pdf +// SeqMemoHash(s, R, N) +void static ethash_compute_cache_nodes( + node *const nodes, + ethash_params const *params, + const uint8_t seed[32]) { + assert((params->cache_size % sizeof(node)) == 0); + uint32_t const num_nodes = (uint32_t)(params->cache_size / sizeof(node)); + + SHA3_512(nodes[0].bytes, seed, 32); + + for (unsigned i = 1; i != num_nodes; ++i) { + SHA3_512(nodes[i].bytes, nodes[i - 1].bytes, 64); + } + + for (unsigned j = 0; j != CACHE_ROUNDS; j++) { + for (unsigned i = 0; i != num_nodes; i++) { + uint32_t const idx = nodes[i].words[0] % num_nodes; + node data; + data = nodes[(num_nodes - 1 + i) % num_nodes]; + for (unsigned w = 0; w != NODE_WORDS; ++w) + { + data.words[w] ^= nodes[idx].words[w]; + } + SHA3_512(nodes[i].bytes, data.bytes, sizeof(data)); + } + } + + // now perform endian conversion +#if BYTE_ORDER != LITTLE_ENDIAN + for (unsigned w = 0; w != (num_nodes*NODE_WORDS); ++w) + { + nodes->words[w] = fix_endian32(nodes->words[w]); + } +#endif +} + +void ethash_mkcache( + ethash_cache *cache, + ethash_params const *params, + const uint8_t seed[32]) { + node *nodes = (node *) cache->mem; + ethash_compute_cache_nodes(nodes, params, seed); +} + +void ethash_calculate_dag_item( + node *const ret, + const unsigned node_index, + const struct ethash_params *params, + const struct ethash_cache *cache) { + + uint32_t num_parent_nodes = (uint32_t)(params->cache_size / sizeof(node)); + node const *cache_nodes = (node const *) cache->mem; + node const *init = &cache_nodes[node_index % num_parent_nodes]; + + memcpy(ret, init, sizeof(node)); + ret->words[0] ^= node_index; + SHA3_512(ret->bytes, ret->bytes, sizeof(node)); + +#if defined(_M_X64) && ENABLE_SSE + __m128i const fnv_prime = _mm_set1_epi32(FNV_PRIME); + __m128i xmm0 = ret->xmm[0]; + __m128i xmm1 = ret->xmm[1]; + __m128i xmm2 = ret->xmm[2]; + __m128i xmm3 = ret->xmm[3]; +#endif + + for (unsigned i = 0; i != DAG_PARENTS; ++i) + { + uint32_t parent_index = ((node_index ^ i)*FNV_PRIME ^ ret->words[i % NODE_WORDS]) % num_parent_nodes; + node const *parent = &cache_nodes[parent_index]; + + #if defined(_M_X64) && ENABLE_SSE + { + xmm0 = _mm_mullo_epi32(xmm0, fnv_prime); + xmm1 = _mm_mullo_epi32(xmm1, fnv_prime); + xmm2 = _mm_mullo_epi32(xmm2, fnv_prime); + xmm3 = _mm_mullo_epi32(xmm3, fnv_prime); + xmm0 = _mm_xor_si128(xmm0, parent->xmm[0]); + xmm1 = _mm_xor_si128(xmm1, parent->xmm[1]); + xmm2 = _mm_xor_si128(xmm2, parent->xmm[2]); + xmm3 = _mm_xor_si128(xmm3, parent->xmm[3]); + + // have to write to ret as values are used to compute index + ret->xmm[0] = xmm0; + ret->xmm[1] = xmm1; + ret->xmm[2] = xmm2; + ret->xmm[3] = xmm3; + } + #else + { + for (unsigned w = 0; w != NODE_WORDS; ++w) { + ret->words[w] = fnv_hash(ret->words[w], parent->words[w]); + } + } + #endif + } + + SHA3_512(ret->bytes, ret->bytes, sizeof(node)); +} + +void ethash_compute_full_data( + void *mem, + ethash_params const *params, + ethash_cache const *cache) { + assert((params->full_size % (sizeof(uint32_t) * MIX_WORDS)) == 0); + assert((params->full_size % sizeof(node)) == 0); + node *full_nodes = mem; + + // now compute full nodes + for (unsigned n = 0; n != (params->full_size / sizeof(node)); ++n) { + ethash_calculate_dag_item(&(full_nodes[n]), n, params, cache); + } +} + +static void ethash_hash( + ethash_return_value * ret, + node const *full_nodes, + ethash_cache const *cache, + ethash_params const *params, + const uint8_t header_hash[32], + const uint64_t nonce) { + + assert((params->full_size % MIX_WORDS) == 0); + + // pack hash and nonce together into first 40 bytes of s_mix + assert(sizeof(node)*8 == 512); + node s_mix[MIX_NODES + 1]; + memcpy(s_mix[0].bytes, header_hash, 32); + +#if BYTE_ORDER != LITTLE_ENDIAN + s_mix[0].double_words[4] = fix_endian64(nonce); +#else + s_mix[0].double_words[4] = nonce; +#endif + + // compute sha3-512 hash and replicate across mix + SHA3_512(s_mix->bytes, s_mix->bytes, 40); + +#if BYTE_ORDER != LITTLE_ENDIAN + for (unsigned w = 0; w != 16; ++w) { + s_mix[0].words[w] = fix_endian32(s_mix[0].words[w]); + } +#endif + + node* const mix = s_mix + 1; + for (unsigned w = 0; w != MIX_WORDS; ++w) { + mix->words[w] = s_mix[0].words[w % NODE_WORDS]; + } + + unsigned const + page_size = sizeof(uint32_t) * MIX_WORDS, + num_full_pages = (unsigned)(params->full_size / page_size); + + + for (unsigned i = 0; i != ACCESSES; ++i) + { + uint32_t const index = ((s_mix->words[0] ^ i)*FNV_PRIME ^ mix->words[i % MIX_WORDS]) % num_full_pages; + + for (unsigned n = 0; n != MIX_NODES; ++n) + { + const node * dag_node = &full_nodes[MIX_NODES * index + n]; + + if (!full_nodes) { + node tmp_node; + ethash_calculate_dag_item(&tmp_node, index * MIX_NODES + n, params, cache); + dag_node = &tmp_node; + } + + #if defined(_M_X64) && ENABLE_SSE + { + __m128i fnv_prime = _mm_set1_epi32(FNV_PRIME); + __m128i xmm0 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[0]); + __m128i xmm1 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[1]); + __m128i xmm2 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[2]); + __m128i xmm3 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[3]); + mix[n].xmm[0] = _mm_xor_si128(xmm0, dag_node->xmm[0]); + mix[n].xmm[1] = _mm_xor_si128(xmm1, dag_node->xmm[1]); + mix[n].xmm[2] = _mm_xor_si128(xmm2, dag_node->xmm[2]); + mix[n].xmm[3] = _mm_xor_si128(xmm3, dag_node->xmm[3]); + } + #else + { + for (unsigned w = 0; w != NODE_WORDS; ++w) { + mix[n].words[w] = fnv_hash(mix[n].words[w], dag_node->words[w]); + } + } + #endif + } + + } + + // compress mix + for (unsigned w = 0; w != MIX_WORDS; w += 4) + { + uint32_t reduction = mix->words[w+0]; + reduction = reduction*FNV_PRIME ^ mix->words[w+1]; + reduction = reduction*FNV_PRIME ^ mix->words[w+2]; + reduction = reduction*FNV_PRIME ^ mix->words[w+3]; + mix->words[w/4] = reduction; + } + +#if BYTE_ORDER != LITTLE_ENDIAN + for (unsigned w = 0; w != MIX_WORDS/4; ++w) { + mix->words[w] = fix_endian32(mix->words[w]); + } +#endif + + memcpy(ret->mix_hash, mix->bytes, 32); + // final Keccak hash + SHA3_256(ret->result, s_mix->bytes, 64+32); // Keccak-256(s + compressed_mix) +} + +void ethash_quick_hash( + uint8_t return_hash[32], + const uint8_t header_hash[32], + const uint64_t nonce, + const uint8_t mix_hash[32]) { + + uint8_t buf[64+32]; + memcpy(buf, header_hash, 32); +#if BYTE_ORDER != LITTLE_ENDIAN + nonce = fix_endian64(nonce); +#endif + memcpy(&(buf[32]), &nonce, 8); + SHA3_512(buf, buf, 40); + memcpy(&(buf[64]), mix_hash, 32); + SHA3_256(return_hash, buf, 64+32); +} + +int ethash_quick_check_difficulty( + const uint8_t header_hash[32], + const uint64_t nonce, + const uint8_t mix_hash[32], + const uint8_t difficulty[32]) { + + uint8_t return_hash[32]; + ethash_quick_hash(return_hash, header_hash, nonce, mix_hash); + return ethash_check_difficulty(return_hash, difficulty); +} + +void ethash_full(ethash_return_value * ret, void const *full_mem, ethash_params const *params, const uint8_t previous_hash[32], const uint64_t nonce) { + ethash_hash(ret, (node const *) full_mem, NULL, params, previous_hash, nonce); +} + +void ethash_light(ethash_return_value * ret, ethash_cache const *cache, ethash_params const *params, const uint8_t previous_hash[32], const uint64_t nonce) { + ethash_hash(ret, NULL, cache, params, previous_hash, nonce); +} diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/internal.h b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/internal.h new file mode 100644 index 000000000..bcbacdaa4 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/internal.h @@ -0,0 +1,48 @@ +#pragma once +#include "compiler.h" +#include "endian.h" +#include "ethash.h" + +#define ENABLE_SSE 1 + +#if defined(_M_X64) && ENABLE_SSE +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// compile time settings +#define NODE_WORDS (64/4) +#define MIX_WORDS (MIX_BYTES/4) +#define MIX_NODES (MIX_WORDS / NODE_WORDS) +#include + +typedef union node { + uint8_t bytes[NODE_WORDS * 4]; + uint32_t words[NODE_WORDS]; + uint64_t double_words[NODE_WORDS / 2]; + +#if defined(_M_X64) && ENABLE_SSE + __m128i xmm[NODE_WORDS/4]; +#endif + +} node; + +void ethash_calculate_dag_item( + node *const ret, + const unsigned node_index, + ethash_params const *params, + ethash_cache const *cache +); + +void ethash_quick_hash( + uint8_t return_hash[32], + const uint8_t header_hash[32], + const uint64_t nonce, + const uint8_t mix_hash[32]); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/sha3.c b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/sha3.c new file mode 100644 index 000000000..0c28230b8 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/sha3.c @@ -0,0 +1,151 @@ +/** libkeccak-tiny +* +* A single-file implementation of SHA-3 and SHAKE. +* +* Implementor: David Leon Gil +* License: CC0, attribution kindly requested. Blame taken too, +* but not liability. +*/ +#include "sha3.h" + +#include +#include +#include +#include + +/******** The Keccak-f[1600] permutation ********/ + +/*** Constants. ***/ +static const uint8_t rho[24] = \ + { 1, 3, 6, 10, 15, 21, + 28, 36, 45, 55, 2, 14, + 27, 41, 56, 8, 25, 43, + 62, 18, 39, 61, 20, 44}; +static const uint8_t pi[24] = \ + {10, 7, 11, 17, 18, 3, + 5, 16, 8, 21, 24, 4, + 15, 23, 19, 13, 12, 2, + 20, 14, 22, 9, 6, 1}; +static const uint64_t RC[24] = \ + {1ULL, 0x8082ULL, 0x800000000000808aULL, 0x8000000080008000ULL, + 0x808bULL, 0x80000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL, + 0x8aULL, 0x88ULL, 0x80008009ULL, 0x8000000aULL, + 0x8000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL, + 0x8000000000008002ULL, 0x8000000000000080ULL, 0x800aULL, 0x800000008000000aULL, + 0x8000000080008081ULL, 0x8000000000008080ULL, 0x80000001ULL, 0x8000000080008008ULL}; + +/*** Helper macros to unroll the permutation. ***/ +#define rol(x, s) (((x) << s) | ((x) >> (64 - s))) +#define REPEAT6(e) e e e e e e +#define REPEAT24(e) REPEAT6(e e e e) +#define REPEAT5(e) e e e e e +#define FOR5(v, s, e) \ + v = 0; \ + REPEAT5(e; v += s;) + +/*** Keccak-f[1600] ***/ +static inline void keccakf(void* state) { + uint64_t* a = (uint64_t*)state; + uint64_t b[5] = {0}; + uint64_t t = 0; + uint8_t x, y; + + for (int i = 0; i < 24; i++) { + // Theta + FOR5(x, 1, + b[x] = 0; + FOR5(y, 5, + b[x] ^= a[x + y]; )) + FOR5(x, 1, + FOR5(y, 5, + a[y + x] ^= b[(x + 4) % 5] ^ rol(b[(x + 1) % 5], 1); )) + // Rho and pi + t = a[1]; + x = 0; + REPEAT24(b[0] = a[pi[x]]; + a[pi[x]] = rol(t, rho[x]); + t = b[0]; + x++; ) + // Chi + FOR5(y, + 5, + FOR5(x, 1, + b[x] = a[y + x];) + FOR5(x, 1, + a[y + x] = b[x] ^ ((~b[(x + 1) % 5]) & b[(x + 2) % 5]); )) + // Iota + a[0] ^= RC[i]; + } +} + +/******** The FIPS202-defined functions. ********/ + +/*** Some helper macros. ***/ + +#define _(S) do { S } while (0) +#define FOR(i, ST, L, S) \ + _(for (size_t i = 0; i < L; i += ST) { S; }) +#define mkapply_ds(NAME, S) \ + static inline void NAME(uint8_t* dst, \ + const uint8_t* src, \ + size_t len) { \ + FOR(i, 1, len, S); \ + } +#define mkapply_sd(NAME, S) \ + static inline void NAME(const uint8_t* src, \ + uint8_t* dst, \ + size_t len) { \ + FOR(i, 1, len, S); \ + } + +mkapply_ds(xorin, dst[i] ^= src[i]) // xorin +mkapply_sd(setout, dst[i] = src[i]) // setout + +#define P keccakf +#define Plen 200 + +// Fold P*F over the full blocks of an input. +#define foldP(I, L, F) \ + while (L >= rate) { \ + F(a, I, rate); \ + P(a); \ + I += rate; \ + L -= rate; \ + } + +/** The sponge-based hash construction. **/ +static inline int hash(uint8_t* out, size_t outlen, + const uint8_t* in, size_t inlen, + size_t rate, uint8_t delim) { + if ((out == NULL) || ((in == NULL) && inlen != 0) || (rate >= Plen)) { + return -1; + } + uint8_t a[Plen] = {0}; + // Absorb input. + foldP(in, inlen, xorin); + // Xor in the DS and pad frame. + a[inlen] ^= delim; + a[rate - 1] ^= 0x80; + // Xor in the last block. + xorin(a, in, inlen); + // Apply P + P(a); + // Squeeze output. + foldP(out, outlen, setout); + setout(a, out, outlen); + memset(a, 0, 200); + return 0; +} + +#define defsha3(bits) \ + int sha3_##bits(uint8_t* out, size_t outlen, \ + const uint8_t* in, size_t inlen) { \ + if (outlen > (bits/8)) { \ + return -1; \ + } \ + return hash(out, outlen, in, inlen, 200 - (bits / 4), 0x01); \ + } + +/*** FIPS202 SHA3 FOFs ***/ +defsha3(256) +defsha3(512) \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/sha3.h b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/sha3.h new file mode 100644 index 000000000..36a0a5301 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/sha3.h @@ -0,0 +1,27 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include "compiler.h" +#include +#include + +#define decsha3(bits) \ + int sha3_##bits(uint8_t*, size_t, const uint8_t*, size_t); + +decsha3(256) +decsha3(512) + +static inline void SHA3_256(uint8_t * const ret, uint8_t const *data, const size_t size) { + sha3_256(ret, 32, data, size); +} + +static inline void SHA3_512(uint8_t * const ret, uint8_t const *data, const size_t size) { + sha3_512(ret, 64, data, size); +} + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/sha3_cryptopp.cpp b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/sha3_cryptopp.cpp new file mode 100644 index 000000000..9454ce04a --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/sha3_cryptopp.cpp @@ -0,0 +1,34 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ + +/** @file sha3.cpp +* @author Tim Hughes +* @date 2015 +*/ + +#include +#include + +extern "C" { +void SHA3_256(uint8_t *const ret, const uint8_t *data, size_t size) { + CryptoPP::SHA3_256().CalculateDigest(ret, data, size); +} + +void SHA3_512(uint8_t *const ret, const uint8_t *data, size_t size) { + CryptoPP::SHA3_512().CalculateDigest(ret, data, size); +} +} \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/sha3_cryptopp.h b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/sha3_cryptopp.h new file mode 100644 index 000000000..f910960e1 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/sha3_cryptopp.h @@ -0,0 +1,15 @@ +#pragma once + +#include "compiler.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif + +void SHA3_256(uint8_t *const ret, const uint8_t *data, size_t size); +void SHA3_512(uint8_t *const ret, const uint8_t *data, size_t size); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/util.c b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/util.c new file mode 100644 index 000000000..fbf268b7d --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/util.c @@ -0,0 +1,41 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file util.c + * @author Tim Hughes + * @date 2015 + */ +#include +#include +#include "util.h" + +#ifdef _MSC_VER + +// foward declare without all of Windows.h +__declspec(dllimport) void __stdcall OutputDebugStringA(const char* lpOutputString); + +void debugf(const char *str, ...) +{ + va_list args; + va_start(args, str); + + char buf[1<<16]; + _vsnprintf_s(buf, sizeof(buf), sizeof(buf), str, args); + buf[sizeof(buf)-1] = '\0'; + OutputDebugStringA(buf); +} + +#endif diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/util.h b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/util.h new file mode 100644 index 000000000..2f59076f6 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/libethash/util.h @@ -0,0 +1,47 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file util.h + * @author Tim Hughes + * @date 2015 + */ +#pragma once +#include +#include "compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _MSC_VER +void debugf(const char *str, ...); +#else +#define debugf printf +#endif + +static inline uint32_t min_u32(uint32_t a, uint32_t b) +{ + return a < b ? a : b; +} + +static inline uint32_t clamp_u32(uint32_t x, uint32_t min_, uint32_t max_) +{ + return x < min_ ? min_ : (x > max_ ? max_ : x); +} + +#ifdef __cplusplus +} +#endif diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/src/python/core.c b/Godeps/_workspace/src/github.com/ethereum/ethash/src/python/core.c new file mode 100644 index 000000000..f7d8be156 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/src/python/core.c @@ -0,0 +1,64 @@ +#include +#include +#include +#include "../libethash/ethash.h" + +static PyObject* +get_cache_size(PyObject* self, PyObject* args) +{ + unsigned long block_number; + if (!PyArg_ParseTuple(args, "k", &block_number)) + return 0; + return Py_BuildValue("i", ethash_get_cachesize(block_number)); +} + +static PyObject* +get_full_size(PyObject* self, PyObject* args) +{ + unsigned long block_number; + if (!PyArg_ParseTuple(args, "k", &block_number)) + return 0; + return Py_BuildValue("i", ethash_get_datasize(block_number)); +} + + +static PyObject* +mkcache(PyObject* self, PyObject* args) +{ + char * seed; + unsigned long cache_size; + int seed_len; + + if (!PyArg_ParseTuple(args, "ks#", &cache_size, &seed, &seed_len)) + return 0; + + if (seed_len != 32) + { + PyErr_SetString(PyExc_ValueError, + "Seed must be 32 bytes long"); + return 0; + } + + printf("cache size: %lu\n", cache_size); + ethash_params params; + params.cache_size = (size_t) cache_size; + ethash_cache cache; + cache.mem = alloca(cache_size); + ethash_mkcache(&cache, ¶ms, (uint8_t *) seed); + return PyString_FromStringAndSize(cache.mem, cache_size); +} + + +static PyMethodDef CoreMethods[] = +{ + {"get_cache_size", get_cache_size, METH_VARARGS, "Get the cache size for a given block number"}, + {"get_full_size", get_full_size, METH_VARARGS, "Get the full size for a given block number"}, + {"mkcache", mkcache, METH_VARARGS, "Makes the cache for given parameters and seed hash"}, + {NULL, NULL, 0, NULL} +}; + +PyMODINIT_FUNC +initcore(void) +{ + (void) Py_InitModule("core", CoreMethods); +} diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/test/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/test/CMakeLists.txt deleted file mode 100644 index 431af6a99..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/test/CMakeLists.txt +++ /dev/null @@ -1,30 +0,0 @@ -IF( NOT Boost_FOUND ) - find_package(Boost COMPONENTS unit_test_framework) -ENDIF() - -IF( Boost_FOUND ) - include_directories( ${Boost_INCLUDE_DIR} ) - include_directories(..) - - link_directories ( ${Boost_LIBRARY_DIRS} ) - file(GLOB HEADERS "*.h") - ADD_DEFINITIONS(-DBOOST_TEST_DYN_LINK) - - if (NOT CRYPTOPP_FOUND) - find_package (CryptoPP) - endif() - - if (CRYPTOPP_FOUND) - add_definitions(-DWITH_CRYPTOPP) - endif() - - add_executable (Test test.cpp ${HEADERS}) - target_link_libraries (Test ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY} ${ETHHASH_LIBS}) - - if (CRYPTOPP_FOUND) - TARGET_LINK_LIBRARIES(Test ${CRYPTOPP_LIBRARIES}) - endif() - - enable_testing () - add_test(NAME ethash COMMAND Test) -ENDIF() diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/test/c/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/test/c/CMakeLists.txt new file mode 100644 index 000000000..02e2aab91 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/test/c/CMakeLists.txt @@ -0,0 +1,30 @@ +IF( NOT Boost_FOUND ) + find_package(Boost COMPONENTS unit_test_framework) +ENDIF() + +IF( Boost_FOUND ) + include_directories( ${Boost_INCLUDE_DIR} ) + include_directories(../../src) + + link_directories ( ${Boost_LIBRARY_DIRS} ) + file(GLOB HEADERS "*.h") + ADD_DEFINITIONS(-DBOOST_TEST_DYN_LINK) + + if (NOT CRYPTOPP_FOUND) + find_package (CryptoPP) + endif() + + if (CRYPTOPP_FOUND) + add_definitions(-DWITH_CRYPTOPP) + endif() + + add_executable (Test test.cpp ${HEADERS}) + target_link_libraries (Test ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY} ${ETHHASH_LIBS}) + + if (CRYPTOPP_FOUND) + TARGET_LINK_LIBRARIES(Test ${CRYPTOPP_LIBRARIES}) + endif() + + enable_testing () + add_test(NAME ethash COMMAND Test) +ENDIF() diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/test/c/test.cpp b/Godeps/_workspace/src/github.com/ethereum/ethash/test/c/test.cpp new file mode 100644 index 000000000..336713cb7 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/test/c/test.cpp @@ -0,0 +1,214 @@ +#include +#include +#include +#include + +#ifdef WITH_CRYPTOPP +#include +#else +#include +#endif // WITH_CRYPTOPP + +#define BOOST_TEST_MODULE Daggerhashimoto +#define BOOST_TEST_MAIN + +#include +#include + +std::string bytesToHexString(const uint8_t *str, const size_t s) { + std::ostringstream ret; + + for (int i = 0; i < s; ++i) + ret << std::hex << std::setfill('0') << std::setw(2) << std::nouppercase << (int) str[i]; + + return ret.str(); +} + +BOOST_AUTO_TEST_CASE(fnv_hash_check) { + uint32_t x = 1235U; + const uint32_t + y = 9999999U, + expected = (FNV_PRIME * x) ^ y; + + x = fnv_hash(x, y); + + BOOST_REQUIRE_MESSAGE(x == expected, + "\nexpected: " << expected << "\n" + << "actual: " << x << "\n"); + +} + +BOOST_AUTO_TEST_CASE(SHA256_check) { + uint8_t input[32], out[32]; + memcpy(input, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 32); + SHA3_256(out, input, 32); + const std::string + expected = "2b5ddf6f4d21c23de216f44d5e4bdc68e044b71897837ea74c83908be7037cd7", + actual = bytesToHexString(out, 32); + BOOST_REQUIRE_MESSAGE(expected == actual, + "\nexpected: " << expected.c_str() << "\n" + << "actual: " << actual.c_str() << "\n"); +} + +BOOST_AUTO_TEST_CASE(SHA512_check) { + uint8_t input[64], out[64]; + memcpy(input, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 64); + SHA3_512(out, input, 64); + const std::string + expected = "0be8a1d334b4655fe58c6b38789f984bb13225684e86b20517a55ab2386c7b61c306f25e0627c60064cecd6d80cd67a82b3890bd1289b7ceb473aad56a359405", + actual = bytesToHexString(out, 64); + BOOST_REQUIRE_MESSAGE(expected == actual, + "\nexpected: " << expected.c_str() << "\n" + << "actual: " << actual.c_str() << "\n"); +} + +BOOST_AUTO_TEST_CASE(ethash_params_init_genesis_check) { + ethash_params params; + ethash_params_init(¶ms, 0); + BOOST_REQUIRE_MESSAGE(params.full_size < DAGSIZE_BYTES_INIT, + "\nfull size: " << params.full_size << "\n" + << "should be less than or equal to: " << DAGSIZE_BYTES_INIT << "\n"); + BOOST_REQUIRE_MESSAGE(params.full_size + 20*MIX_BYTES >= DAGSIZE_BYTES_INIT, + "\nfull size + 20*MIX_BYTES: " << params.full_size + 20*MIX_BYTES << "\n" + << "should be greater than or equal to: " << DAGSIZE_BYTES_INIT << "\n"); + BOOST_REQUIRE_MESSAGE(params.cache_size < DAGSIZE_BYTES_INIT / 32, + "\ncache size: " << params.cache_size << "\n" + << "should be less than or equal to: " << DAGSIZE_BYTES_INIT / 32 << "\n"); +} + +BOOST_AUTO_TEST_CASE(ethash_params_init_genesis_calcifide_check) { + ethash_params params; + ethash_params_init(¶ms, 0); + const uint32_t expected_full_size = 1073739904; + const uint32_t expected_cache_size = 1048384; + BOOST_REQUIRE_MESSAGE(params.full_size == expected_full_size, + "\nexpected: " << expected_cache_size << "\n" + << "actual: " << params.full_size << "\n"); + BOOST_REQUIRE_MESSAGE(params.cache_size == expected_cache_size, + "\nexpected: " << expected_cache_size << "\n" + << "actual: " << params.cache_size << "\n"); +} + +BOOST_AUTO_TEST_CASE(light_and_full_client_checks) { + ethash_params params; + uint8_t seed[32], hash[32]; + ethash_return_value light_out, full_out; + memcpy(seed, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 32); + memcpy(hash, "~~~X~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 32); + ethash_params_init(¶ms, 0); + params.cache_size = 1024; + params.full_size = 1024 * 32; + ethash_cache cache; + cache.mem = alloca(params.cache_size); + ethash_mkcache(&cache, ¶ms, seed); + node * full_mem = (node *) alloca(params.full_size); + ethash_compute_full_data(full_mem, ¶ms, &cache); + + { + const std::string + expected = "2da2b506f21070e1143d908e867962486d6b0a02e31d468fd5e3a7143aafa76a14201f63374314e2a6aaf84ad2eb57105dea3378378965a1b3873453bb2b78f9a8620b2ebeca41fbc773bb837b5e724d6eb2de570d99858df0d7d97067fb8103b21757873b735097b35d3bea8fd1c359a9e8a63c1540c76c9784cf8d975e995ca8620b2ebeca41fbc773bb837b5e724d6eb2de570d99858df0d7d97067fb8103b21757873b735097b35d3bea8fd1c359a9e8a63c1540c76c9784cf8d975e995ca8620b2ebeca41fbc773bb837b5e724d6eb2de570d99858df0d7d97067fb8103b21757873b735097b35d3bea8fd1c359a9e8a63c1540c76c9784cf8d975e995c259440b89fa3481c2c33171477c305c8e1e421f8d8f6d59585449d0034f3e421808d8da6bbd0b6378f567647cc6c4ba6c434592b198ad444e7284905b7c6adaf70bf43ec2daa7bd5e8951aa609ab472c124cf9eba3d38cff5091dc3f58409edcc386c743c3bd66f92408796ee1e82dd149eaefbf52b00ce33014a6eb3e50625413b072a58bc01da28262f42cbe4f87d4abc2bf287d15618405a1fe4e386fcdafbb171064bd99901d8f81dd6789396ce5e364ac944bbbd75a7827291c70b42d26385910cd53ca535ab29433dd5c5714d26e0dce95514c5ef866329c12e958097e84462197c2b32087849dab33e88b11da61d52f9dbc0b92cc61f742c07dbbf751c49d7678624ee60dfbe62e5e8c47a03d8247643f3d16ad8c8e663953bcda1f59d7e2d4a9bf0768e789432212621967a8f41121ad1df6ae1fa78782530695414c6213942865b2730375019105cae91a4c17a558d4b63059661d9f108362143107babe0b848de412e4da59168cce82bfbff3c99e022dd6ac1e559db991f2e3f7bb910cefd173e65ed00a8d5d416534e2c8416ff23977dbf3eb7180b75c71580d08ce95efeb9b0afe904ea12285a392aff0c8561ff79fca67f694a62b9e52377485c57cc3598d84cac0a9d27960de0cc31ff9bbfe455acaa62c8aa5d2cce96f345da9afe843d258a99c4eaf3650fc62efd81c7b81cd0d534d2d71eeda7a6e315d540b4473c80f8730037dc2ae3e47b986240cfc65ccc565f0d8cde0bc68a57e39a271dda57440b3598bee19f799611d25731a96b5dbbbefdff6f4f656161462633030d62560ea4e9c161cf78fc96a2ca5aaa32453a6c5dea206f766244e8c9d9a8dc61185ce37f1fc804459c5f07434f8ecb34141b8dcae7eae704c950b55556c5f40140c3714b45eddb02637513268778cbf937a33e4e33183685f9deb31ef54e90161e76d969587dd782eaa94e289420e7c2ee908517f5893a26fdb5873d68f92d118d4bcf98d7a4916794d6ab290045e30f9ea00ca547c584b8482b0331ba1539a0f2714fddc3a0b06b0cfbb6a607b8339c39bcfd6640b1f653e9d70ef6c985b", + actual = bytesToHexString((uint8_t const *) cache.mem, params.cache_size); + + BOOST_REQUIRE_MESSAGE(expected == actual, + "\nexpected: " << expected.c_str() << "\n" + << "actual: " << actual.c_str() << "\n"); + } + + + + { + node node; + ethash_calculate_dag_item(&node, 0, ¶ms, &cache); + const std::string + actual = bytesToHexString((uint8_t const *) &node, sizeof(node)), + expected = "b1698f829f90b35455804e5185d78f549fcb1bdce2bee006d4d7e68eb154b596be1427769eb1c3c3e93180c760af75f81d1023da6a0ffbe321c153a7c0103597"; + BOOST_REQUIRE_MESSAGE(actual == expected, + "\n" << "expected: " << expected.c_str() << "\n" + << "actual: " << actual.c_str() << "\n"); + } + + { + for (int i = 0 ; i < params.full_size / sizeof(node) ; ++i ) { + for (uint32_t j = 0; j < 32; ++j) { + node expected_node; + ethash_calculate_dag_item(&expected_node, j, ¶ms, &cache); + const std::string + actual = bytesToHexString((uint8_t const *) &(full_mem[j]), sizeof(node)), + expected = bytesToHexString((uint8_t const *) &expected_node, sizeof(node)); + BOOST_REQUIRE_MESSAGE(actual == expected, + "\ni: " << j << "\n" + << "expected: " << expected.c_str() << "\n" + << "actual: " << actual.c_str() << "\n"); + } + } + } + + { + uint64_t nonce = 0x7c7c597c; + ethash_full(&full_out, full_mem, ¶ms, hash, nonce); + ethash_light(&light_out, &cache, ¶ms, hash, nonce); + const std::string + light_result_string = bytesToHexString(light_out.result, 32), + full_result_string = bytesToHexString(full_out.result, 32); + BOOST_REQUIRE_MESSAGE(light_result_string == full_result_string, + "\nlight result: " << light_result_string.c_str() << "\n" + << "full result: " << full_result_string.c_str() << "\n"); + const std::string + light_mix_hash_string = bytesToHexString(light_out.mix_hash, 32), + full_mix_hash_string = bytesToHexString(full_out.mix_hash, 32); + BOOST_REQUIRE_MESSAGE(full_mix_hash_string == light_mix_hash_string, + "\nlight mix hash: " << light_mix_hash_string.c_str() << "\n" + << "full mix hash: " << full_mix_hash_string.c_str() << "\n"); + uint8_t check_hash[32]; + ethash_quick_hash(check_hash, hash, nonce, full_out.mix_hash); + const std::string check_hash_string = bytesToHexString(check_hash, 32); + BOOST_REQUIRE_MESSAGE(check_hash_string == full_result_string, + "\ncheck hash string: " << check_hash_string.c_str() << "\n" + << "full result: " << full_result_string.c_str() << "\n"); + } + { + ethash_full(&full_out, full_mem, ¶ms, hash, 5); + std::string + light_result_string = bytesToHexString(light_out.result, 32), + full_result_string = bytesToHexString(full_out.result, 32); + + BOOST_REQUIRE_MESSAGE(light_result_string != full_result_string, + "\nlight result and full result should differ: " << light_result_string.c_str() << "\n"); + + ethash_light(&light_out, &cache, ¶ms, hash, 5); + light_result_string = bytesToHexString(light_out.result, 32); + BOOST_REQUIRE_MESSAGE(light_result_string == full_result_string, + "\nlight result and full result should be the same\n" + << "light result: " << light_result_string.c_str() << "\n" + << "full result: " << full_result_string.c_str() << "\n"); + std::string + light_mix_hash_string = bytesToHexString(light_out.mix_hash, 32), + full_mix_hash_string = bytesToHexString(full_out.mix_hash, 32); + BOOST_REQUIRE_MESSAGE(full_mix_hash_string == light_mix_hash_string, + "\nlight mix hash: " << light_mix_hash_string.c_str() << "\n" + << "full mix hash: " << full_mix_hash_string.c_str() << "\n"); + } +} + +BOOST_AUTO_TEST_CASE(ethash_check_difficulty_check) { + uint8_t hash[32], target[32]; + memset(hash, 0, 32); + memset(target, 0, 32); + + memcpy(hash, "11111111111111111111111111111111", 32); + memcpy(target, "22222222222222222222222222222222", 32); + BOOST_REQUIRE_MESSAGE( + ethash_check_difficulty(hash, target), + "\nexpected \"" << hash << "\" to have less difficulty than \"" << target << "\"\n"); + BOOST_REQUIRE_MESSAGE( + !ethash_check_difficulty(hash, hash), + "\nexpected \"" << hash << "\" to have the same difficulty as \"" << hash << "\"\n"); + memcpy(target, "11111111111111111111111111111112", 32); + BOOST_REQUIRE_MESSAGE( + ethash_check_difficulty(hash, target), + "\nexpected \"" << hash << "\" to have less difficulty than \"" << target << "\"\n"); + memcpy(target, "11111111111111111111111111111110", 32); + BOOST_REQUIRE_MESSAGE( + !ethash_check_difficulty(hash, target), + "\nexpected \"" << hash << "\" to have more difficulty than \"" << target << "\"\n"); +} \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/test/c/test.sh b/Godeps/_workspace/src/github.com/ethereum/ethash/test/c/test.sh new file mode 100644 index 000000000..6d02d30f8 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/test/c/test.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# Strict mode +set -e + +SOURCE="${BASH_SOURCE[0]}" +while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" +done +TEST_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" + +rm -rf $TEST_DIR/build +mkdir -p $TEST_DIR/build +cd $TEST_DIR/build ; +cmake ../../.. > /dev/null +make Test +./test/c/Test diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/test/go/ethash_test.go b/Godeps/_workspace/src/github.com/ethereum/ethash/test/go/ethash_test.go deleted file mode 100644 index d734954e1..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/test/go/ethash_test.go +++ /dev/null @@ -1,54 +0,0 @@ -package ethashTest - -import ( - "bytes" - "crypto/rand" - "log" - "math/big" - "testing" - - "github.com/ethereum/ethash" - "github.com/ethereum/go-ethereum/core" - "github.com/ethereum/go-ethereum/ethdb" -) - -func TestEthash(t *testing.T) { - seedHash := make([]byte, 32) - _, err := rand.Read(seedHash) - if err != nil { - panic(err) - } - - db, err := ethdb.NewMemDatabase() - if err != nil { - panic(err) - } - - blockProcessor, err := core.NewCanonical(5, db) - if err != nil { - panic(err) - } - - log.Println("Block Number: ", blockProcessor.ChainManager().CurrentBlock().Number()) - - e := ethash.New(blockProcessor.ChainManager()) - - miningHash := make([]byte, 32) - if _, err := rand.Read(miningHash); err != nil { - panic(err) - } - diff := big.NewInt(10000) - log.Println("difficulty", diff) - - nonce := uint64(0) - - ghash_full := e.FullHash(nonce, miningHash) - log.Printf("ethash full (on nonce): %x %x\n", ghash_full, nonce) - - ghash_light := e.LightHash(nonce, miningHash) - log.Printf("ethash light (on nonce): %x %x\n", ghash_light, nonce) - - if bytes.Compare(ghash_full, ghash_light) != 0 { - t.Errorf("full: %x, light: %x", ghash_full, ghash_light) - } -} diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/test/python/.gitignore b/Godeps/_workspace/src/github.com/ethereum/ethash/test/python/.gitignore new file mode 100644 index 000000000..c304fd615 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/test/python/.gitignore @@ -0,0 +1 @@ +python-virtual-env/ diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/test/python/requirements.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/test/python/requirements.txt new file mode 100644 index 000000000..1f38dc3c7 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/test/python/requirements.txt @@ -0,0 +1,2 @@ +pyethereum==0.7.522 +nose==1.3.4 diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/test/python/test.sh b/Godeps/_workspace/src/github.com/ethereum/ethash/test/python/test.sh new file mode 100644 index 000000000..4a547d157 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/test/python/test.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# Strict mode +set -e + +SOURCE="${BASH_SOURCE[0]}" +while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" +done +TEST_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" + +[ -d $TEST_DIR/python-virtual-env ] || virtualenv --system-site-packages $TEST_DIR/python-virtual-env +source $TEST_DIR/python-virtual-env/bin/activate +pip install -r $TEST_DIR/requirements.txt > /dev/null +pip install -e $TEST_DIR/../.. > /dev/null +cd $TEST_DIR +nosetests --with-doctest -v diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/test/python/test_pyethash.py b/Godeps/_workspace/src/github.com/ethereum/ethash/test/python/test_pyethash.py new file mode 100644 index 000000000..ca9321e92 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/test/python/test_pyethash.py @@ -0,0 +1,45 @@ +import pyethash +from random import randint + +def test_get_cache_size_not_None(): + for _ in range(100): + block_num = randint(0,12456789) + out = pyethash.core.get_cache_size(block_num) + assert out != None + +def test_get_full_size_not_None(): + for _ in range(100): + block_num = randint(0,12456789) + out = pyethash.core.get_full_size(block_num) + assert out != None + +def test_get_cache_size_based_on_EPOCH(): + for _ in range(100): + block_num = randint(0,12456789) + out1 = pyethash.core.get_cache_size(block_num) + out2 = pyethash.core.get_cache_size((block_num // pyethash.EPOCH_LENGTH) * pyethash.EPOCH_LENGTH) + assert out1 == out2 + +def test_get_full_size_based_on_EPOCH(): + for _ in range(100): + block_num = randint(0,12456789) + out1 = pyethash.core.get_full_size(block_num) + out2 = pyethash.core.get_full_size((block_num // pyethash.EPOCH_LENGTH) * pyethash.EPOCH_LENGTH) + assert out1 == out2 + +#def test_get_params_based_on_EPOCH(): +# block_num = 123456 +# out1 = pyethash.core.get_params(block_num) +# out2 = pyethash.core.get_params((block_num // pyethash.EPOCH_LENGTH) * pyethash.EPOCH_LENGTH) +# assert out1["DAG Size"] == out2["DAG Size"] +# assert out1["Cache Size"] == out2["Cache Size"] +# +#def test_get_params_returns_different_values_based_on_different_block_input(): +# out1 = pyethash.core.get_params(123456) +# out2 = pyethash.core.get_params(12345) +# assert out1["DAG Size"] != out2["DAG Size"] +# assert out1["Cache Size"] != out2["Cache Size"] +# +#def test_get_cache_smoke_test(): +# params = pyethash.core.get_params(123456) +# assert pyethash.core.mkcache(params, "~~~~") != None diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/test/test.cpp b/Godeps/_workspace/src/github.com/ethereum/ethash/test/test.cpp deleted file mode 100644 index bec44e138..000000000 --- a/Godeps/_workspace/src/github.com/ethereum/ethash/test/test.cpp +++ /dev/null @@ -1,233 +0,0 @@ -#include -#include -#include -#include - -#ifdef WITH_CRYPTOPP -#include -#else -#include -#endif // WITH_CRYPTOPP - -#define BOOST_TEST_MODULE Daggerhashimoto -#define BOOST_TEST_MAIN - -#include -#include -#include - -std::string bytesToHexString(const uint8_t *str, const size_t s) { - std::ostringstream ret; - - for (int i = 0; i < s; ++i) - ret << std::hex << std::setfill('0') << std::setw(2) << std::nouppercase << (int) str[i]; - - return ret.str(); -} - -BOOST_AUTO_TEST_CASE(fnv_hash_check) { - uint32_t x = 1235U; - const uint32_t - y = 9999999U, - expected = (FNV_PRIME * x) ^ y; - - x = fnv_hash(x, y); - - BOOST_REQUIRE_MESSAGE(x == expected, - "\nexpected: " << expected << "\n" - << "actual: " << x << "\n"); - -} - -BOOST_AUTO_TEST_CASE(SHA256_check) { - uint8_t input[32], out[32]; - memcpy(input, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 32); - SHA3_256(out, input, 32); - const std::string - expected = "2b5ddf6f4d21c23de216f44d5e4bdc68e044b71897837ea74c83908be7037cd7", - actual = bytesToHexString(out, 32); - BOOST_REQUIRE_MESSAGE(expected == actual, - "\nexpected: " << expected.c_str() << "\n" - << "actual: " << actual.c_str() << "\n"); -} - -BOOST_AUTO_TEST_CASE(SHA512_check) { - uint8_t input[64], out[64]; - memcpy(input, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 64); - SHA3_512(out, input, 64); - const std::string - expected = "0be8a1d334b4655fe58c6b38789f984bb13225684e86b20517a55ab2386c7b61c306f25e0627c60064cecd6d80cd67a82b3890bd1289b7ceb473aad56a359405", - actual = bytesToHexString(out, 64); - BOOST_REQUIRE_MESSAGE(expected == actual, - "\nexpected: " << expected.c_str() << "\n" - << "actual: " << actual.c_str() << "\n"); -} - -BOOST_AUTO_TEST_CASE(ethash_params_init_genesis_check) { - ethash_params params; - ethash_params_init(¶ms, 0); - BOOST_REQUIRE_MESSAGE(params.full_size < DAGSIZE_BYTES_INIT, - "\nfull size: " << params.full_size << "\n" - << "should be less than or equal to: " << DAGSIZE_BYTES_INIT << "\n"); - BOOST_REQUIRE_MESSAGE(params.full_size + 20*MIX_BYTES >= DAGSIZE_BYTES_INIT, - "\nfull size + 20*MIX_BYTES: " << params.full_size + 20*MIX_BYTES << "\n" - << "should be greater than or equal to: " << DAGSIZE_BYTES_INIT << "\n"); - BOOST_REQUIRE_MESSAGE(params.cache_size < DAGSIZE_BYTES_INIT / 32, - "\ncache size: " << params.cache_size << "\n" - << "should be less than or equal to: " << DAGSIZE_BYTES_INIT / 32 << "\n"); -} - -BOOST_AUTO_TEST_CASE(ethash_params_init_genesis_calcifide_check) { - ethash_params params; - ethash_params_init(¶ms, 0); - const uint32_t expected_full_size = 1073739904; - const uint32_t expected_cache_size = 1048384; - BOOST_REQUIRE_MESSAGE(params.full_size == expected_full_size, - "\nexpected: " << expected_cache_size << "\n" - << "actual: " << params.full_size << "\n"); - BOOST_REQUIRE_MESSAGE(params.cache_size == expected_cache_size, - "\nexpected: " << expected_cache_size << "\n" - << "actual: " << params.cache_size << "\n"); -} - -BOOST_AUTO_TEST_CASE(ethash_params_init_check) { - ethash_params params; - ethash_params_init(¶ms, 1971000); - const uint64_t nine_month_size = (uint64_t) 8*DAGSIZE_BYTES_INIT; - BOOST_REQUIRE_MESSAGE(params.full_size < nine_month_size, - "\nfull size: " << params.full_size << "\n" - << "should be less than or equal to: " << nine_month_size << "\n"); - BOOST_REQUIRE_MESSAGE(params.full_size + DAGSIZE_BYTES_INIT / 4 > nine_month_size, - "\nfull size + DAGSIZE_BYTES_INIT / 4: " << params.full_size + DAGSIZE_BYTES_INIT / 4 << "\n" - << "should be greater than or equal to: " << nine_month_size << "\n"); - BOOST_REQUIRE_MESSAGE(params.cache_size < nine_month_size / 1024, - "\nactual cache size: " << params.cache_size << "\n" - << "expected: " << nine_month_size / 1024 << "\n"); - BOOST_REQUIRE_MESSAGE(params.cache_size + DAGSIZE_BYTES_INIT / 4 / 1024 > nine_month_size / 1024 , - "\ncache size + DAGSIZE_BYTES_INIT / 4 / 1024: " << params.cache_size + DAGSIZE_BYTES_INIT / 4 / 1024 << "\n" - << "actual: " << nine_month_size / 32 << "\n"); -} - -BOOST_AUTO_TEST_CASE(light_and_full_client_checks) { - ethash_params params; - uint8_t seed[32], hash[32]; - ethash_return_value light_out, full_out; - memcpy(seed, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 32); - memcpy(hash, "~~~X~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 32); - ethash_params_init(¶ms, 0); - params.cache_size = 1024; - params.full_size = 1024 * 32; - ethash_cache cache; - cache.mem = alloca(params.cache_size); - ethash_mkcache(&cache, ¶ms, seed); - node * full_mem = (node *) alloca(params.full_size); - ethash_compute_full_data(full_mem, ¶ms, &cache); - - { - const std::string - expected = "2da2b506f21070e1143d908e867962486d6b0a02e31d468fd5e3a7143aafa76a14201f63374314e2a6aaf84ad2eb57105dea3378378965a1b3873453bb2b78f9a8620b2ebeca41fbc773bb837b5e724d6eb2de570d99858df0d7d97067fb8103b21757873b735097b35d3bea8fd1c359a9e8a63c1540c76c9784cf8d975e995ca8620b2ebeca41fbc773bb837b5e724d6eb2de570d99858df0d7d97067fb8103b21757873b735097b35d3bea8fd1c359a9e8a63c1540c76c9784cf8d975e995ca8620b2ebeca41fbc773bb837b5e724d6eb2de570d99858df0d7d97067fb8103b21757873b735097b35d3bea8fd1c359a9e8a63c1540c76c9784cf8d975e995c259440b89fa3481c2c33171477c305c8e1e421f8d8f6d59585449d0034f3e421808d8da6bbd0b6378f567647cc6c4ba6c434592b198ad444e7284905b7c6adaf70bf43ec2daa7bd5e8951aa609ab472c124cf9eba3d38cff5091dc3f58409edcc386c743c3bd66f92408796ee1e82dd149eaefbf52b00ce33014a6eb3e50625413b072a58bc01da28262f42cbe4f87d4abc2bf287d15618405a1fe4e386fcdafbb171064bd99901d8f81dd6789396ce5e364ac944bbbd75a7827291c70b42d26385910cd53ca535ab29433dd5c5714d26e0dce95514c5ef866329c12e958097e84462197c2b32087849dab33e88b11da61d52f9dbc0b92cc61f742c07dbbf751c49d7678624ee60dfbe62e5e8c47a03d8247643f3d16ad8c8e663953bcda1f59d7e2d4a9bf0768e789432212621967a8f41121ad1df6ae1fa78782530695414c6213942865b2730375019105cae91a4c17a558d4b63059661d9f108362143107babe0b848de412e4da59168cce82bfbff3c99e022dd6ac1e559db991f2e3f7bb910cefd173e65ed00a8d5d416534e2c8416ff23977dbf3eb7180b75c71580d08ce95efeb9b0afe904ea12285a392aff0c8561ff79fca67f694a62b9e52377485c57cc3598d84cac0a9d27960de0cc31ff9bbfe455acaa62c8aa5d2cce96f345da9afe843d258a99c4eaf3650fc62efd81c7b81cd0d534d2d71eeda7a6e315d540b4473c80f8730037dc2ae3e47b986240cfc65ccc565f0d8cde0bc68a57e39a271dda57440b3598bee19f799611d25731a96b5dbbbefdff6f4f656161462633030d62560ea4e9c161cf78fc96a2ca5aaa32453a6c5dea206f766244e8c9d9a8dc61185ce37f1fc804459c5f07434f8ecb34141b8dcae7eae704c950b55556c5f40140c3714b45eddb02637513268778cbf937a33e4e33183685f9deb31ef54e90161e76d969587dd782eaa94e289420e7c2ee908517f5893a26fdb5873d68f92d118d4bcf98d7a4916794d6ab290045e30f9ea00ca547c584b8482b0331ba1539a0f2714fddc3a0b06b0cfbb6a607b8339c39bcfd6640b1f653e9d70ef6c985b", - actual = bytesToHexString((uint8_t const *) cache.mem, params.cache_size); - - BOOST_REQUIRE_MESSAGE(expected == actual, - "\nexpected: " << expected.c_str() << "\n" - << "actual: " << actual.c_str() << "\n"); - } - - - - { - node node; - ethash_calculate_dag_item(&node, 0, ¶ms, &cache); - const std::string - actual = bytesToHexString((uint8_t const *) &node, sizeof(node)), - expected = "b1698f829f90b35455804e5185d78f549fcb1bdce2bee006d4d7e68eb154b596be1427769eb1c3c3e93180c760af75f81d1023da6a0ffbe321c153a7c0103597"; - BOOST_REQUIRE_MESSAGE(actual == expected, - "\n" << "expected: " << expected.c_str() << "\n" - << "actual: " << actual.c_str() << "\n"); - } - - { - for (int i = 0 ; i < params.full_size / sizeof(node) ; ++i ) { - for (uint32_t j = 0; j < 32; ++j) { - node expected_node; - ethash_calculate_dag_item(&expected_node, j, ¶ms, &cache); - const std::string - actual = bytesToHexString((uint8_t const *) &(full_mem[j]), sizeof(node)), - expected = bytesToHexString((uint8_t const *) &expected_node, sizeof(node)); - BOOST_REQUIRE_MESSAGE(actual == expected, - "\ni: " << j << "\n" - << "expected: " << expected.c_str() << "\n" - << "actual: " << actual.c_str() << "\n"); - } - } - } - - { - uint64_t nonce = 0x7c7c597c; - ethash_full(&full_out, full_mem, ¶ms, hash, nonce); - ethash_light(&light_out, &cache, ¶ms, hash, nonce); - const std::string - light_result_string = bytesToHexString(light_out.result, 32), - full_result_string = bytesToHexString(full_out.result, 32); - BOOST_REQUIRE_MESSAGE(light_result_string == full_result_string, - "\nlight result: " << light_result_string.c_str() << "\n" - << "full result: " << full_result_string.c_str() << "\n"); - const std::string - light_mix_hash_string = bytesToHexString(light_out.mix_hash, 32), - full_mix_hash_string = bytesToHexString(full_out.mix_hash, 32); - BOOST_REQUIRE_MESSAGE(full_mix_hash_string == light_mix_hash_string, - "\nlight mix hash: " << light_mix_hash_string.c_str() << "\n" - << "full mix hash: " << full_mix_hash_string.c_str() << "\n"); - uint8_t check_hash[32]; - ethash_quick_hash(check_hash, hash, nonce, full_out.mix_hash); - const std::string check_hash_string = bytesToHexString(check_hash, 32); - BOOST_REQUIRE_MESSAGE(check_hash_string == full_result_string, - "\ncheck hash string: " << check_hash_string.c_str() << "\n" - << "full result: " << full_result_string.c_str() << "\n"); - } - { - ethash_full(&full_out, full_mem, ¶ms, hash, 5); - std::string - light_result_string = bytesToHexString(light_out.result, 32), - full_result_string = bytesToHexString(full_out.result, 32); - - BOOST_REQUIRE_MESSAGE(light_result_string != full_result_string, - "\nlight result and full result should differ: " << light_result_string.c_str() << "\n"); - - ethash_light(&light_out, &cache, ¶ms, hash, 5); - light_result_string = bytesToHexString(light_out.result, 32); - BOOST_REQUIRE_MESSAGE(light_result_string == full_result_string, - "\nlight result and full result should be the same\n" - << "light result: " << light_result_string.c_str() << "\n" - << "full result: " << full_result_string.c_str() << "\n"); - std::string - light_mix_hash_string = bytesToHexString(light_out.mix_hash, 32), - full_mix_hash_string = bytesToHexString(full_out.mix_hash, 32); - BOOST_REQUIRE_MESSAGE(full_mix_hash_string == light_mix_hash_string, - "\nlight mix hash: " << light_mix_hash_string.c_str() << "\n" - << "full mix hash: " << full_mix_hash_string.c_str() << "\n"); - } -} - -BOOST_AUTO_TEST_CASE(ethash_check_difficulty_check) { - uint8_t hash[32], target[32]; - memset(hash, 0, 32); - memset(target, 0, 32); - - memcpy(hash, "11111111111111111111111111111111", 32); - memcpy(target, "22222222222222222222222222222222", 32); - BOOST_REQUIRE_MESSAGE( - ethash_check_difficulty(hash, target), - "\nexpected \"" << hash << "\" to have less difficulty than \"" << target << "\"\n"); - BOOST_REQUIRE_MESSAGE( - !ethash_check_difficulty(hash, hash), - "\nexpected \"" << hash << "\" to have the same difficulty as \"" << hash << "\"\n"); - memcpy(target, "11111111111111111111111111111112", 32); - BOOST_REQUIRE_MESSAGE( - ethash_check_difficulty(hash, target), - "\nexpected \"" << hash << "\" to have less difficulty than \"" << target << "\"\n"); - memcpy(target, "11111111111111111111111111111110", 32); - BOOST_REQUIRE_MESSAGE( - !ethash_check_difficulty(hash, target), - "\nexpected \"" << hash << "\" to have more difficulty than \"" << target << "\"\n"); -} \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/test/test.sh b/Godeps/_workspace/src/github.com/ethereum/ethash/test/test.sh new file mode 100644 index 000000000..eb38f6dfc --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/test/test.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Strict mode +set -e + +SOURCE="${BASH_SOURCE[0]}" +while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" +done +TEST_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" + +echo -e "\n################# Testing JS ##################" +# TODO: Use mocha and real testing tools instead of rolling our own +cd $TEST_DIR/../js +node test.js + +echo -e "\n################# Testing C ##################" +$TEST_DIR/c/test.sh + +echo -e "\n################# Testing Python ##################" +$TEST_DIR/python/test.sh + +#echo "################# Testing Go ##################" +#$TEST_DIR/go/test.sh -- cgit