Austin Schuh | c55b017 | 2022-02-20 17:52:35 -0800 | [diff] [blame^] | 1 | #include <iostream> |
| 2 | #define EIGEN_USE_SYCL |
| 3 | #include <unsupported/Eigen/CXX11/Tensor> |
| 4 | |
| 5 | using Eigen::array; |
| 6 | using Eigen::SyclDevice; |
| 7 | using Eigen::Tensor; |
| 8 | using Eigen::TensorMap; |
| 9 | |
| 10 | int main() |
| 11 | { |
| 12 | using DataType = float; |
| 13 | using IndexType = int64_t; |
| 14 | constexpr auto DataLayout = Eigen::RowMajor; |
| 15 | |
| 16 | auto devices = Eigen::get_sycl_supported_devices(); |
| 17 | const auto device_selector = *devices.begin(); |
| 18 | Eigen::QueueInterface queueInterface(device_selector); |
| 19 | auto sycl_device = Eigen::SyclDevice(&queueInterface); |
| 20 | |
| 21 | // create the tensors to be used in the operation |
| 22 | IndexType sizeDim1 = 3; |
| 23 | IndexType sizeDim2 = 3; |
| 24 | IndexType sizeDim3 = 3; |
| 25 | array<IndexType, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; |
| 26 | |
| 27 | // initialize the tensors with the data we want manipulate to |
| 28 | Tensor<DataType, 3,DataLayout, IndexType> in1(tensorRange); |
| 29 | Tensor<DataType, 3,DataLayout, IndexType> in2(tensorRange); |
| 30 | Tensor<DataType, 3,DataLayout, IndexType> out(tensorRange); |
| 31 | |
| 32 | // set up some random data in the tensors to be multiplied |
| 33 | in1 = in1.random(); |
| 34 | in2 = in2.random(); |
| 35 | |
| 36 | // allocate memory for the tensors |
| 37 | DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(in1.size()*sizeof(DataType))); |
| 38 | DataType * gpu_in2_data = static_cast<DataType*>(sycl_device.allocate(in2.size()*sizeof(DataType))); |
| 39 | DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.size()*sizeof(DataType))); |
| 40 | |
| 41 | // |
| 42 | TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_in1(gpu_in1_data, tensorRange); |
| 43 | TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_in2(gpu_in2_data, tensorRange); |
| 44 | TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_out(gpu_out_data, tensorRange); |
| 45 | |
| 46 | // copy the memory to the device and do the c=a*b calculation |
| 47 | sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.size())*sizeof(DataType)); |
| 48 | sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in2.size())*sizeof(DataType)); |
| 49 | gpu_out.device(sycl_device) = gpu_in1 * gpu_in2; |
| 50 | sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType)); |
| 51 | sycl_device.synchronize(); |
| 52 | |
| 53 | // print out the results |
| 54 | for (IndexType i = 0; i < sizeDim1; ++i) { |
| 55 | for (IndexType j = 0; j < sizeDim2; ++j) { |
| 56 | for (IndexType k = 0; k < sizeDim3; ++k) { |
| 57 | std::cout << "device_out" << "(" << i << ", " << j << ", " << k << ") : " << out(i,j,k) |
| 58 | << " vs host_out" << "(" << i << ", " << j << ", " << k << ") : " << in1(i,j,k) * in2(i,j,k) << "\n"; |
| 59 | } |
| 60 | } |
| 61 | } |
| 62 | printf("c=a*b Done\n"); |
| 63 | } |