Pull request #6958 updated 13:12:50 Connecting to https://api.github.com using 476720/****** Obtained .jenkins from 40790a3d4d9917d4f4b4547756392b0810e27f60+cc602957cec5627c8752137c0412caa6fe37d2bc (e94640bbd4f691e847a9877f2227e41b7c5864c3) [Pipeline] Start of Pipeline [Pipeline] withEnv [Pipeline] { [Pipeline] timeout Timeout set to expire in 6 hr 0 min [Pipeline] { [Pipeline] stage [Pipeline] { (Clang-Format) [Pipeline] node Running on waffle03 in /var/jenkins/workspace/Kokkos_PR-6958 [Pipeline] { [Pipeline] checkout Selected Git installation does not exist. Using Default The recommended git tool is: NONE using credential Jenkins ORNL Cloning the remote Git repository Cloning with configured refspecs honoured and without tags Cloning repository https://github.com/kokkos/kokkos.git > git init /var/jenkins/workspace/Kokkos_PR-6958 # timeout=10 Fetching upstream changes from https://github.com/kokkos/kokkos.git > git --version # timeout=10 > git --version # 'git version 2.17.1' using GIT_ASKPASS to set credentials > git fetch --no-tags --progress -- https://github.com/kokkos/kokkos.git +refs/pull/6958/head:refs/remotes/origin/PR-6958 +refs/heads/develop:refs/remotes/origin/develop # timeout=10 Fetching without tags > git config remote.origin.url https://github.com/kokkos/kokkos.git # timeout=10 > git config --add remote.origin.fetch +refs/pull/6958/head:refs/remotes/origin/PR-6958 # timeout=10 > git config --add remote.origin.fetch +refs/heads/develop:refs/remotes/origin/develop # timeout=10 > git config remote.origin.url https://github.com/kokkos/kokkos.git # timeout=10 Fetching upstream changes from https://github.com/kokkos/kokkos.git using GIT_ASKPASS to set credentials > git fetch --no-tags --progress -- https://github.com/kokkos/kokkos.git +refs/pull/6958/head:refs/remotes/origin/PR-6958 +refs/heads/develop:refs/remotes/origin/develop # timeout=10 Merging remotes/origin/develop commit cc602957cec5627c8752137c0412caa6fe37d2bc into PR head commit 40790a3d4d9917d4f4b4547756392b0810e27f60 Merge succeeded, producing 1dc4c792885b390ff8c85464779deb95e2e6a6f2 Checking out Revision 1dc4c792885b390ff8c85464779deb95e2e6a6f2 (PR-6958) Commit message: "Merge commit 'cc602957cec5627c8752137c0412caa6fe37d2bc' into HEAD" First time build. Skipping changelog. > git config core.sparsecheckout # timeout=10 > git checkout -f 40790a3d4d9917d4f4b4547756392b0810e27f60 # timeout=10 > git remote # timeout=10 > git config --get remote.origin.url # timeout=10 using GIT_ASKPASS to set credentials > git merge cc602957cec5627c8752137c0412caa6fe37d2bc # timeout=10 > git rev-parse HEAD^{commit} # timeout=10 > git config core.sparsecheckout # timeout=10 > git checkout -f 1dc4c792885b390ff8c85464779deb95e2e6a6f2 # timeout=10 > git rev-list --no-walk d1cac1d5c46eb80a84381d440b761aae13a6337f # timeout=10 [Pipeline] withEnv [Pipeline] { [Pipeline] isUnix [Pipeline] readFile [Pipeline] sh + docker build -t cf4be35b9ecf4aa955bec3e9708c87821099b24d -f scripts/docker/Dockerfile.clang scripts/docker DEPRECATED: The legacy builder is deprecated and will be removed in a future release. Install the buildx component to build images with BuildKit: https://docs.docker.com/go/buildx/ Sending build context to Docker daemon 22.02kB Step 1/3 : FROM ubuntu:20.04 ---> f78909c2b360 Step 2/3 : RUN apt-get update && apt-get install -y bc git build-essential clang-format-8 wget && apt-get clean && rm -rf /var/lib/apt/lists/* ---> Using cache ---> 722ed8e6ecaf Step 3/3 : ENV CLANG_FORMAT_EXE=clang-format-8 ---> Using cache ---> b0f5cdcaf30d Successfully built b0f5cdcaf30d Successfully tagged cf4be35b9ecf4aa955bec3e9708c87821099b24d:latest [Pipeline] isUnix [Pipeline] withEnv [Pipeline] { [Pipeline] sh + docker inspect -f . cf4be35b9ecf4aa955bec3e9708c87821099b24d . [Pipeline] } [Pipeline] // withEnv [Pipeline] withDockerContainer waffle03 seems to be running inside container bf46c7fb29181f4c351c4ae7a94cec4ae2435125f11e128cc828b0229bd51391 $ docker run -t -d -u 0:0 -v /tmp/ccache.kokkos:/tmp/ccache -w /var/jenkins/workspace/Kokkos_PR-6958 --volumes-from bf46c7fb29181f4c351c4ae7a94cec4ae2435125f11e128cc828b0229bd51391 -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** -e ******** cf4be35b9ecf4aa955bec3e9708c87821099b24d cat $ docker top ad4d50a256f6b42b2cf57e26aac5a0bffbcb5b0a4cb4bc09167ff7af647be233 -eo pid,comm [Pipeline] { [Pipeline] sh + ./scripts/docker/check_format_cpp.sh diff --git a/core/src/SYCL/Kokkos_SYCL_Instance.cpp b/core/src/SYCL/Kokkos_SYCL_Instance.cpp index cc391855a..adfd4c10b 100644 --- a/core/src/SYCL/Kokkos_SYCL_Instance.cpp +++ b/core/src/SYCL/Kokkos_SYCL_Instance.cpp @@ -166,8 +166,9 @@ int SYCLInternal::acquire_team_scratch_space() { return current_team_scratch; } -Kokkos::Impl::SYCLTypes::device_ptr<void> SYCLInternal::resize_team_scratch_space( - int scratch_pool_id, std::int64_t bytes, bool force_shrink) { +Kokkos::Impl::SYCLTypes::device_ptr<void> +SYCLInternal::resize_team_scratch_space(int scratch_pool_id, std::int64_t bytes, + bool force_shrink) { // Multiple ParallelFor/Reduce Teams can call this function at the same time // and invalidate the m_team_scratch_ptr. We use a pool to avoid any race // condition. diff --git a/core/src/SYCL/Kokkos_SYCL_Instance.hpp b/core/src/SYCL/Kokkos_SYCL_Instance.hpp index 17ff791d2..de77b8efd 100644 --- a/core/src/SYCL/Kokkos_SYCL_Instance.hpp +++ b/core/src/SYCL/Kokkos_SYCL_Instance.hpp @@ -43,8 +43,10 @@ class SYCLInternal { SYCLInternal& operator=(SYCLInternal&&) = delete; SYCLInternal(SYCLInternal&&) = delete; - Kokkos::Impl::SYCLTypes::device_ptr<void> scratch_space(const std::size_t size); - Kokkos::Impl::SYCLTypes::device_ptr<void> scratch_flags(const std::size_t size); + Kokkos::Impl::SYCLTypes::device_ptr<void> scratch_space( + const std::size_t size); + Kokkos::Impl::SYCLTypes::device_ptr<void> scratch_flags( + const std::size_t size); Kokkos::Impl::SYCLTypes::host_ptr<void> scratch_host(const std::size_t size); int acquire_team_scratch_space(); Kokkos::Impl::SYCLTypes::device_ptr<void> resize_team_scratch_space( @@ -58,11 +60,11 @@ class SYCLInternal { uint32_t m_maxConcurrency = 0; uint64_t m_maxShmemPerBlock = 0; - std::size_t m_scratchSpaceCount = 0; + std::size_t m_scratchSpaceCount = 0; Kokkos::Impl::SYCLTypes::device_ptr<size_type> m_scratchSpace = nullptr; - std::size_t m_scratchHostCount = 0; + std::size_t m_scratchHostCount = 0; Kokkos::Impl::SYCLTypes::host_ptr<size_type> m_scratchHost = nullptr; - std::size_t m_scratchFlagsCount = 0; + std::size_t m_scratchFlagsCount = 0; Kokkos::Impl::SYCLTypes::device_ptr<size_type> m_scratchFlags = nullptr; // mutex to access shared memory mutable std::mutex m_mutexScratchSpace; diff --git a/core/src/SYCL/Kokkos_SYCL_ParallelFor_Team.hpp b/core/src/SYCL/Kokkos_SYCL_ParallelFor_Team.hpp index b5837b5e0..7f258eccc 100644 --- a/core/src/SYCL/Kokkos_SYCL_ParallelFor_Team.hpp +++ b/core/src/SYCL/Kokkos_SYCL_ParallelFor_Team.hpp @@ -162,12 +162,13 @@ class Kokkos::Impl::ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, // Functor's reduce memory, team scan memory, and team shared memory depend // upon team size. - auto& space = *m_policy.space().impl_internal_space_instance(); - m_scratch_pool_id = space.acquire_team_scratch_space(); - m_global_scratch_ptr = static_cast<Kokkos::Impl::SYCLTypes::device_ptr<char>>( - space.resize_team_scratch_space( - m_scratch_pool_id, - static_cast<ptrdiff_t>(m_scratch_size[1]) * m_league_size)); + auto& space = *m_policy.space().impl_internal_space_instance(); + m_scratch_pool_id = space.acquire_team_scratch_space(); + m_global_scratch_ptr = + static_cast<Kokkos::Impl::SYCLTypes::device_ptr<char>>( + space.resize_team_scratch_space( + m_scratch_pool_id, + static_cast<ptrdiff_t>(m_scratch_size[1]) * m_league_size)); if (static_cast<int>(space.m_maxShmemPerBlock) < m_shmem_size - m_shmem_begin) { diff --git a/core/src/SYCL/Kokkos_SYCL_ParallelReduce_MDRange.hpp b/core/src/SYCL/Kokkos_SYCL_ParallelReduce_MDRange.hpp index fa17f5619..155f4b008 100644 --- a/core/src/SYCL/Kokkos_SYCL_ParallelReduce_MDRange.hpp +++ b/core/src/SYCL/Kokkos_SYCL_ParallelReduce_MDRange.hpp @@ -114,8 +114,9 @@ class Kokkos::Impl::ParallelReduce<CombinedFunctorReducerType, #else (void)memcpy_event; #endif - results_ptr = static_cast<Kokkos::Impl::SYCLTypes::device_ptr<value_type>>( - instance.scratch_space(sizeof(value_type) * value_count)); + results_ptr = + static_cast<Kokkos::Impl::SYCLTypes::device_ptr<value_type>>( + instance.scratch_space(sizeof(value_type) * value_count)); auto device_accessible_result_ptr = m_result_ptr_device_accessible ? static_cast<sycl::global_ptr<value_type>>(m_result_ptr) @@ -155,8 +156,10 @@ class Kokkos::Impl::ParallelReduce<CombinedFunctorReducerType, n_wgroups = (n_tiles + values_per_thread - 1) / values_per_thread; } - results_ptr = static_cast<Kokkos::Impl::SYCLTypes::device_ptr<value_type>>( - instance.scratch_space(sizeof(value_type) * value_count * n_wgroups)); + results_ptr = + static_cast<Kokkos::Impl::SYCLTypes::device_ptr<value_type>>( + instance.scratch_space(sizeof(value_type) * value_count * + n_wgroups)); auto device_accessible_result_ptr = m_result_ptr_device_accessible ? static_cast<sycl::global_ptr<value_type>>(m_result_ptr) diff --git a/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Range.hpp b/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Range.hpp index 1221c2cd3..c00f9bb62 100644 --- a/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Range.hpp +++ b/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Range.hpp @@ -88,8 +88,9 @@ class Kokkos::Impl::ParallelReduce<CombinedFunctorReducerType, // working with the global scratch memory but don't copy back to // m_result_ptr yet. if (size <= 1) { - results_ptr = static_cast<Kokkos::Impl::SYCLTypes::device_ptr<value_type>>( - instance.scratch_space(sizeof(value_type) * value_count)); + results_ptr = + static_cast<Kokkos::Impl::SYCLTypes::device_ptr<value_type>>( + instance.scratch_space(sizeof(value_type) * value_count)); auto parallel_reduce_event = q.submit([&](sycl::handler& cgh) { const auto begin = policy.begin(); @@ -303,9 +304,10 @@ class Kokkos::Impl::ParallelReduce<CombinedFunctorReducerType, wgroup_size; } - results_ptr = static_cast<Kokkos::Impl::SYCLTypes::device_ptr<value_type>>( - instance.scratch_space(sizeof(value_type) * value_count * - n_wgroups)); + results_ptr = + static_cast<Kokkos::Impl::SYCLTypes::device_ptr<value_type>>( + instance.scratch_space(sizeof(value_type) * value_count * + n_wgroups)); sycl::local_accessor<value_type> local_mem( sycl::range<1>(wgroup_size) * value_count, cgh); diff --git a/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Team.hpp b/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Team.hpp index e73a67c68..b9be11488 100644 --- a/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Team.hpp +++ b/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Team.hpp @@ -94,9 +94,10 @@ class Kokkos::Impl::ParallelReduce<CombinedFunctorReducerType, // working with the global scratch memory but don't copy back to // m_result_ptr yet. if (size <= 1) { - results_ptr = static_cast<Kokkos::Impl::SYCLTypes::device_ptr<value_type>>( - instance.scratch_space(sizeof(value_type) * - std::max(value_count, 1u))); + results_ptr = + static_cast<Kokkos::Impl::SYCLTypes::device_ptr<value_type>>( + instance.scratch_space(sizeof(value_type) * + std::max(value_count, 1u))); auto device_accessible_result_ptr = m_result_ptr_device_accessible ? static_cast<sycl::global_ptr<value_type>>(m_result_ptr) @@ -333,9 +334,10 @@ class Kokkos::Impl::ParallelReduce<CombinedFunctorReducerType, const auto init_size = std::max<std::size_t>((size + wgroup_size - 1) / wgroup_size, 1); - results_ptr = static_cast<Kokkos::Impl::SYCLTypes::device_ptr<value_type>>( - instance.scratch_space(sizeof(value_type) * - std::max(value_count, 1u) * init_size)); + results_ptr = + static_cast<Kokkos::Impl::SYCLTypes::device_ptr<value_type>>( + instance.scratch_space(sizeof(value_type) * + std::max(value_count, 1u) * init_size)); size_t max_work_groups = 2 * @@ -428,12 +430,13 @@ class Kokkos::Impl::ParallelReduce<CombinedFunctorReducerType, // Functor's reduce memory, team scan memory, and team shared memory depend // upon team size. - auto& space = *m_policy.space().impl_internal_space_instance(); - m_scratch_pool_id = space.acquire_team_scratch_space(); - m_global_scratch_ptr = static_cast<Kokkos::Impl::SYCLTypes::device_ptr<char>>( - space.resize_team_scratch_space( - m_scratch_pool_id, - static_cast<ptrdiff_t>(m_scratch_size[1]) * m_league_size)); + auto& space = *m_policy.space().impl_internal_space_instance(); + m_scratch_pool_id = space.acquire_team_scratch_space(); + m_global_scratch_ptr = + static_cast<Kokkos::Impl::SYCLTypes::device_ptr<char>>( + space.resize_team_scratch_space( + m_scratch_pool_id, + static_cast<ptrdiff_t>(m_scratch_size[1]) * m_league_size)); if (static_cast<int>(space.m_maxShmemPerBlock) < m_shmem_size - m_shmem_begin) { diff --git a/core/src/SYCL/Kokkos_SYCL_ParallelScan_Range.hpp b/core/src/SYCL/Kokkos_SYCL_ParallelScan_Range.hpp index 7c4dfe8e9..a3efe56b9 100644 --- a/core/src/SYCL/Kokkos_SYCL_ParallelScan_Range.hpp +++ b/core/src/SYCL/Kokkos_SYCL_ParallelScan_Range.hpp @@ -292,8 +292,9 @@ class ParallelScanSYCLBase { global_mem = static_cast<Kokkos::Impl::SYCLTypes::device_ptr<value_type>>( instance.scratch_space(n_wgroups * (wgroup_size + 1) * sizeof(value_type))); - m_scratch_host = static_cast<Kokkos::Impl::SYCLTypes::host_ptr<value_type>>( - instance.scratch_host(sizeof(value_type))); + m_scratch_host = + static_cast<Kokkos::Impl::SYCLTypes::host_ptr<value_type>>( + instance.scratch_host(sizeof(value_type))); group_results = global_mem + n_wgroups * wgroup_size; diff --git a/core/src/SYCL/Kokkos_SYCL_WorkgroupReduction.hpp b/core/src/SYCL/Kokkos_SYCL_WorkgroupReduction.hpp index 19c356a08..f25dec767 100644 --- a/core/src/SYCL/Kokkos_SYCL_WorkgroupReduction.hpp +++ b/core/src/SYCL/Kokkos_SYCL_WorkgroupReduction.hpp @@ -100,7 +100,8 @@ std::enable_if_t<!use_shuffle_based_algorithm<ReducerType>> workgroup_reduction( template <typename ValueType, typename ReducerType, int dim> std::enable_if_t<use_shuffle_based_algorithm<ReducerType>> workgroup_reduction( sycl::nd_item<dim>& item, sycl::local_accessor<ValueType> local_mem, - ValueType local_value, Kokkos::Impl::SYCLTypes::device_ptr<ValueType> results_ptr, + ValueType local_value, + Kokkos::Impl::SYCLTypes::device_ptr<ValueType> results_ptr, sycl::global_ptr<ValueType> device_accessible_result_ptr, const ReducerType& final_reducer, bool final, unsigned int max_size) { const auto local_id = item.get_local_linear_id(); diff --git a/core/src/setup/Kokkos_Setup_SYCL.hpp b/core/src/setup/Kokkos_Setup_SYCL.hpp index aa06527b0..7fb10bb39 100644 --- a/core/src/setup/Kokkos_Setup_SYCL.hpp +++ b/core/src/setup/Kokkos_Setup_SYCL.hpp @@ -49,7 +49,7 @@ namespace Kokkos::Impl::SYCLTypes { #ifndef SYCL_EXT_INTEL_USM_ADDRESS_SPACES #error SYCL_EXT_INTEL_USM_ADDRESS_SPACES undefined! -#elif SYCL_EXT_INTEL_USM_ADDRESS_SPACES >=2 +#elif SYCL_EXT_INTEL_USM_ADDRESS_SPACES >= 2 template <typename T> using device_ptr = sycl::ext::intel::device_ptr<T>; template <typename T> @@ -60,6 +60,6 @@ using device_ptr = sycl::device_ptr<T>; template <typename T> using host_ptr = sycl::host_ptr<T>; #endif -} +} // namespace Kokkos::Impl::SYCLTypes #endif [Pipeline] } $ docker stop --time=1 ad4d50a256f6b42b2cf57e26aac5a0bffbcb5b0a4cb4bc09167ff7af647be233 $ docker rm -f --volumes ad4d50a256f6b42b2cf57e26aac5a0bffbcb5b0a4cb4bc09167ff7af647be233 [Pipeline] // withDockerContainer [Pipeline] } [Pipeline] // withEnv [Pipeline] } [Pipeline] // node [Pipeline] } [Pipeline] // stage [Pipeline] stage [Pipeline] { (Build) Stage "Build" skipped due to earlier failure(s) [Pipeline] getContext [Pipeline] parallel [Pipeline] { (Branch: OPENACC-NVHPC-CUDA-12.2) [Pipeline] { (Branch: CUDA-12.2-NVHPC) [Pipeline] { (Branch: SYCL-OneAPI) [Pipeline] { (Branch: HIP-ROCm-5.2) [Pipeline] { (Branch: HIP-ROCm-5.6-C++20) [Pipeline] { (Branch: OPENMPTARGET-Clang) [Pipeline] { (Branch: CUDA-11.0.3-Clang-Tidy) [Pipeline] { (Branch: CUDA-11.7-NVCC) [Pipeline] { (Branch: CUDA-11.0-NVCC-RDC) [Pipeline] { (Branch: CUDA-11.6-NVCC-DEBUG) [Pipeline] { (Branch: GCC-8.4.0) [Pipeline] stage [Pipeline] { (OPENACC-NVHPC-CUDA-12.2) [Pipeline] stage [Pipeline] { (CUDA-12.2-NVHPC) [Pipeline] stage [Pipeline] { (SYCL-OneAPI) [Pipeline] stage [Pipeline] { (HIP-ROCm-5.2) [Pipeline] stage [Pipeline] { (HIP-ROCm-5.6-C++20) [Pipeline] stage [Pipeline] { (OPENMPTARGET-Clang) [Pipeline] stage [Pipeline] { (CUDA-11.0.3-Clang-Tidy) [Pipeline] stage [Pipeline] { (CUDA-11.7-NVCC) [Pipeline] stage [Pipeline] { (CUDA-11.0-NVCC-RDC) [Pipeline] stage [Pipeline] { (CUDA-11.6-NVCC-DEBUG) [Pipeline] stage [Pipeline] { (GCC-8.4.0) Stage "OPENACC-NVHPC-CUDA-12.2" skipped due to earlier failure(s) [Pipeline] getContext [Pipeline] } Stage "CUDA-12.2-NVHPC" skipped due to earlier failure(s) [Pipeline] getContext [Pipeline] } Stage "SYCL-OneAPI" skipped due to earlier failure(s) [Pipeline] getContext [Pipeline] } Stage "HIP-ROCm-5.2" skipped due to earlier failure(s) [Pipeline] getContext [Pipeline] } Stage "HIP-ROCm-5.6-C++20" skipped due to earlier failure(s) [Pipeline] getContext [Pipeline] } Stage "OPENMPTARGET-Clang" skipped due to earlier failure(s) [Pipeline] getContext [Pipeline] } Stage "CUDA-11.0.3-Clang-Tidy" skipped due to earlier failure(s) [Pipeline] getContext [Pipeline] } Stage "CUDA-11.7-NVCC" skipped due to earlier failure(s) [Pipeline] getContext [Pipeline] } Stage "CUDA-11.0-NVCC-RDC" skipped due to earlier failure(s) [Pipeline] getContext [Pipeline] } Stage "CUDA-11.6-NVCC-DEBUG" skipped due to earlier failure(s) [Pipeline] getContext [Pipeline] } Stage "GCC-8.4.0" skipped due to earlier failure(s) [Pipeline] getContext [Pipeline] } [Pipeline] // stage [Pipeline] // stage [Pipeline] // stage [Pipeline] // stage [Pipeline] // stage [Pipeline] // stage [Pipeline] // stage [Pipeline] // stage [Pipeline] // stage [Pipeline] // stage [Pipeline] // stage [Pipeline] } Failed in branch OPENACC-NVHPC-CUDA-12.2 [Pipeline] } Failed in branch CUDA-12.2-NVHPC [Pipeline] } Failed in branch SYCL-OneAPI [Pipeline] } Failed in branch HIP-ROCm-5.2 [Pipeline] } Failed in branch HIP-ROCm-5.6-C++20 [Pipeline] } Failed in branch OPENMPTARGET-Clang [Pipeline] } Failed in branch CUDA-11.0.3-Clang-Tidy [Pipeline] } Failed in branch CUDA-11.7-NVCC [Pipeline] } Failed in branch CUDA-11.0-NVCC-RDC [Pipeline] } Failed in branch CUDA-11.6-NVCC-DEBUG [Pipeline] } Failed in branch GCC-8.4.0 [Pipeline] // parallel [Pipeline] } [Pipeline] // stage [Pipeline] } [Pipeline] // timeout [Pipeline] } [Pipeline] // withEnv [Pipeline] End of Pipeline ERROR: script returned exit code 1 GitHub has been notified of this commit’s build result Finished: FAILURE