Skip to content
Snippets Groups Projects
Commit ed52e858 authored by Eric Müller's avatar Eric Müller :mountain_bicyclist:
Browse files

Fix py-torch package (locally)

Dependency on py-pybind11 now "open" interval… (i.e. later than 2.6.2
allowed for newer version of pytorch).
parent 39ebf960
No related branches found
No related tags found
1 merge request!82Cleanup and fix py-torch concretization
diff --git a/aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cu b/aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cu
index 1cee04c200..f46003d9a9 100644
--- a/aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cu
+++ b/aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cu
@@ -10,48 +10,6 @@
namespace at { namespace native { namespace sparse { namespace cuda {
-std::string cusparseGetErrorString(cusparseStatus_t status) {
- switch(status)
- {
- case CUSPARSE_STATUS_SUCCESS:
- return "success";
-
- case CUSPARSE_STATUS_NOT_INITIALIZED:
- return "library not initialized";
-
- case CUSPARSE_STATUS_ALLOC_FAILED:
- return "resource allocation failed";
-
- case CUSPARSE_STATUS_INVALID_VALUE:
- return "an invalid numeric value was used as an argument";
-
- case CUSPARSE_STATUS_ARCH_MISMATCH:
- return "an absent device architectural feature is required";
-
- case CUSPARSE_STATUS_MAPPING_ERROR:
- return "an access to GPU memory space failed";
-
- case CUSPARSE_STATUS_EXECUTION_FAILED:
- return "the GPU program failed to execute";
-
- case CUSPARSE_STATUS_INTERNAL_ERROR:
- return "an internal operation failed";
-
- case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
- return "the matrix type is not supported by this function";
-
- case CUSPARSE_STATUS_ZERO_PIVOT:
- return "an entry of the matrix is either structural zero or numerical zero (singular block)";
-
- default:
- {
- std::ostringstream oss;
- oss << "unknown error " << static_cast<int64_t>(status);
- return oss.str();
- }
- }
-}
-
inline void CUSPARSE_CHECK(cusparseStatus_t status)
{
if (status != CUSPARSE_STATUS_SUCCESS) {
--- pytorch/cmake/Modules/FindOpenMP.cmake.org 2020-05-26 17:43:53.000000000 +0900
+++ pytorch/cmake/Modules/FindOpenMP.cmake 2020-05-26 17:46:37.000000000 +0900
@@ -84,7 +84,7 @@
unset(OpenMP_FLAG_CANDIDATES)
set(OMP_FLAG_GNU "-fopenmp")
- set(OMP_FLAG_Clang "-fopenmp=libomp" "-fopenmp=libiomp5" "-fopenmp")
+ set(OMP_FLAG_Clang "-fopenmp" "-fopenmp=libomp" "-fopenmp=libiomp5")
# AppleClang may need a header file, search for omp.h with hints to brew
# default include dir
@@ -245,7 +245,7 @@
set(OpenMP_libomp_LIBRARY "${MKL_OPENMP_LIBRARY}" CACHE STRING "libomp location for OpenMP")
else()
find_library(OpenMP_libomp_LIBRARY
- NAMES omp gomp iomp5
+ NAMES fjomp omp gomp iomp5
HINTS ${CMAKE_${LANG}_IMPLICIT_LINK_DIRECTORIES}
DOC "libomp location for OpenMP"
)
# Copyright 2013-2022 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import os
import sys
from spack import *
class PyTorch(PythonPackage, CudaPackage):
"""Tensors and Dynamic neural networks in Python
with strong GPU acceleration."""
homepage = "https://pytorch.org/"
git = "https://github.com/pytorch/pytorch.git"
maintainers = ['adamjstewart']
# Exact set of modules is version- and variant-specific, just attempt to import the
# core libraries to ensure that the package was successfully installed.
import_modules = ['torch', 'torch.autograd', 'torch.nn', 'torch.utils']
version('master', branch='master', submodules=True)
version('1.10.2', tag='v1.10.2', submodules=True)
version('1.10.1', tag='v1.10.1', submodules=True)
version('1.10.0', tag='v1.10.0', submodules=True)
version('1.9.1', tag='v1.9.1', submodules=True)
version('1.9.0', tag='v1.9.0', submodules=True)
version('1.8.2', tag='v1.8.2', submodules=True)
version('1.8.1', tag='v1.8.1', submodules=True)
version('1.8.0', tag='v1.8.0', submodules=True)
version('1.7.1', tag='v1.7.1', submodules=True)
version('1.7.0', tag='v1.7.0', submodules=True)
version('1.6.0', tag='v1.6.0', submodules=True)
version('1.5.1', tag='v1.5.1', submodules=True)
version('1.5.0', tag='v1.5.0', submodules=True)
version('1.4.1', tag='v1.4.1', submodules=True)
version('1.3.1', tag='v1.3.1', submodules=True)
version('1.3.0', tag='v1.3.0', submodules=True)
version('1.2.0', tag='v1.2.0', submodules=True)
version('1.1.0', tag='v1.1.0', submodules=True)
version('1.0.1', tag='v1.0.1', submodules=True, deprecated=True)
version('1.0.0', tag='v1.0.0', submodules=True, deprecated=True)
is_darwin = sys.platform == 'darwin'
# All options are defined in CMakeLists.txt.
# Some are listed in setup.py, but not all.
variant('caffe2', default=True, description='Build Caffe2', when='@1.7:')
variant('test', default=False, description='Build C++ test binaries')
variant('cuda', default=not is_darwin, description='Use CUDA')
variant('rocm', default=False, description='Use ROCm')
variant('cudnn', default=not is_darwin, description='Use cuDNN', when='+cuda')
variant('fbgemm', default=True, description='Use FBGEMM (quantized 8-bit server operators)')
variant('kineto', default=True, description='Use Kineto profiling library', when='@1.8:')
variant('magma', default=not is_darwin, description='Use MAGMA', when='+cuda')
variant('metal', default=is_darwin, description='Use Metal for Caffe2 iOS build')
variant('nccl', default=True, description='Use NCCL', when='+cuda platform=linux')
variant('nccl', default=True, description='Use NCCL', when='+cuda platform=cray')
variant('nccl', default=True, description='Use NCCL', when='+rocm platform=linux')
variant('nccl', default=True, description='Use NCCL', when='+rocm platform=cray')
variant('nnpack', default=True, description='Use NNPACK')
variant('numa', default=True, description='Use NUMA', when='platform=linux')
variant('numa', default=True, description='Use NUMA', when='platform=cray')
variant('numpy', default=True, description='Use NumPy')
variant('openmp', default=True, description='Use OpenMP for parallel code')
variant('qnnpack', default=True, description='Use QNNPACK (quantized 8-bit operators)')
variant('valgrind', default=True, description='Use Valgrind', when='@1.8: platform=linux')
variant('valgrind', default=True, description='Use Valgrind', when='@1.8: platform=cray')
variant('xnnpack', default=True, description='Use XNNPACK', when='@1.5:')
variant('mkldnn', default=True, description='Use MKLDNN')
variant('distributed', default=not is_darwin, description='Use distributed')
variant('mpi', default=not is_darwin, description='Use MPI for Caffe2', when='+distributed')
variant('gloo', default=not is_darwin, description='Use Gloo', when='+distributed')
variant('tensorpipe', default=not is_darwin, description='Use TensorPipe', when='@1.6: +distributed')
variant('onnx_ml', default=True, description='Enable traditional ONNX ML API', when='@1.5:')
variant('breakpad', default=True, description='Enable breakpad crash dump library', when='@1.9:')
conflicts('+cuda+rocm')
conflicts('+breakpad', when='target=ppc64:')
conflicts('+breakpad', when='target=ppc64le:')
conflicts('cuda_arch=none', when='+cuda',
msg='Must specify CUDA compute capabilities of your GPU, see '
'https://developer.nvidia.com/cuda-gpus')
# Required dependencies
depends_on('cmake@3.5:', type='build')
# Use Ninja generator to speed up build times, automatically used if found
depends_on('ninja@1.5:', when='@1.1:', type='build')
# See python_min_version in setup.py
depends_on('python@3.6.2:', when='@1.7.1:', type=('build', 'link', 'run'))
depends_on('python@3.6.1:', when='@1.6:1.7.0', type=('build', 'link', 'run'))
depends_on('python@3.5:', when='@1.5', type=('build', 'link', 'run'))
depends_on('python@2.7:2,3.5:', when='@1.4', type=('build', 'link', 'run'))
depends_on('python@2.7:2,3.5:3.7', when='@:1.3', type=('build', 'link', 'run'))
depends_on('py-setuptools', type=('build', 'run'))
depends_on('py-future', when='@1.5:', type=('build', 'run'))
depends_on('py-future', when='@1.1: ^python@:2', type=('build', 'run'))
depends_on('py-pyyaml', type=('build', 'run'))
depends_on('py-typing', when='^python@:3.4', type=('build', 'run'))
depends_on('py-pybind11@2.6.2:', when='@1.8:', type=('build', 'link', 'run'))
depends_on('py-pybind11@2.3.0', when='@1.1:1.7', type=('build', 'link', 'run'))
depends_on('py-pybind11@2.2.4', when='@:1.0', type=('build', 'link', 'run'))
depends_on('py-dataclasses', when='@1.7: ^python@3.6', type=('build', 'run'))
depends_on('py-tqdm', type='run')
# https://github.com/onnx/onnx#prerequisites
depends_on('py-numpy@1.16.6:', type=('build', 'run'))
depends_on('py-protobuf@3.12.2:', when='@1.10:', type=('build', 'run'))
depends_on('py-protobuf@:3.14', when='@:1.9', type=('build', 'run'))
depends_on('protobuf@3.12.2:', when='@1.10:')
depends_on('protobuf@:3.14', when='@:1.9')
depends_on('py-typing-extensions@3.6.2.1:', when='@1.7:', type=('build', 'run'))
depends_on('blas')
depends_on('lapack')
depends_on('eigen')
# https://github.com/pytorch/pytorch/issues/60329
# depends_on('cpuinfo@2020-12-17', when='@1.8:')
# depends_on('cpuinfo@2020-06-11', when='@1.6:1.7')
# https://github.com/shibatch/sleef/issues/427
# depends_on('sleef@3.5.1_2020-12-22', when='@1.8:')
# https://github.com/pytorch/pytorch/issues/60334
# depends_on('sleef@3.4.0_2019-07-30', when='@1.6:1.7')
# https://github.com/Maratyszcza/FP16/issues/18
# depends_on('fp16@2020-05-14', when='@1.6:')
depends_on('pthreadpool@2021-04-13', when='@1.9:')
depends_on('pthreadpool@2020-10-05', when='@1.8')
depends_on('pthreadpool@2020-06-15', when='@1.6:1.7')
depends_on('psimd@2020-05-17', when='@1.6:')
depends_on('fxdiv@2020-04-17', when='@1.6:')
depends_on('benchmark', when='@1.6:+test')
# Optional dependencies
# https://discuss.pytorch.org/t/compiling-1-10-1-from-source-with-gcc-11-and-cuda-11-5/140971
depends_on('cuda@9.2:', when='@1.11:+cuda', type=('build', 'link', 'run'))
depends_on('cuda@9.2:11.4', when='@1.6:+cuda', type=('build', 'link', 'run'))
depends_on('cuda@9:11.4', when='@1.1:+cuda', type=('build', 'link', 'run'))
depends_on('cuda@7.5:11.4', when='+cuda', type=('build', 'link', 'run'))
depends_on('cudnn@6:7', when='@:1.0+cudnn')
depends_on('cudnn@7.0:7', when='@1.1:1.5+cudnn')
depends_on('cudnn@7:', when='@1.6:+cudnn')
depends_on('magma', when='+magma')
depends_on('nccl', when='+nccl')
depends_on('numactl', when='+numa')
depends_on('llvm-openmp', when='%apple-clang +openmp')
depends_on('valgrind', when='+valgrind')
# https://github.com/pytorch/pytorch/issues/60332
# depends_on('xnnpack@2021-02-22', when='@1.8:+xnnpack')
# depends_on('xnnpack@2020-03-23', when='@1.6:1.7+xnnpack')
depends_on('mpi', when='+mpi')
# https://github.com/pytorch/pytorch/issues/60270
# depends_on('gloo@2021-05-04', when='@1.9:+gloo')
# depends_on('gloo@2020-09-18', when='@1.7:1.8+gloo')
# depends_on('gloo@2020-03-17', when='@1.6+gloo')
# https://github.com/pytorch/pytorch/issues/60331
# depends_on('onnx@1.8.0_2020-11-03', when='@1.8:+onnx_ml')
# depends_on('onnx@1.7.0_2020-05-31', when='@1.6:1.7+onnx_ml')
depends_on('mkl', when='+mkldnn')
# Test dependencies
depends_on('py-hypothesis', type='test')
depends_on('py-six', type='test')
depends_on('py-psutil', type='test')
# Fix BLAS being overridden by MKL
# https://github.com/pytorch/pytorch/issues/60328
patch('https://patch-diff.githubusercontent.com/raw/pytorch/pytorch/pull/59220.patch',
sha256='e37afffe45cf7594c22050109942370e49983ad772d12ebccf508377dc9dcfc9',
when='@1.2:')
# Fixes build on older systems with glibc <2.12
patch('https://patch-diff.githubusercontent.com/raw/pytorch/pytorch/pull/55063.patch',
sha256='e17eaa42f5d7c18bf0d7c37d7b0910127a01ad53fdce3e226a92893356a70395',
when='@1.1:1.8.1')
# Fixes CMake configuration error when XNNPACK is disabled
# https://github.com/pytorch/pytorch/pull/35607
# https://github.com/pytorch/pytorch/pull/37865
patch('xnnpack.patch', when='@1.5')
# Fixes build error when ROCm is enabled for pytorch-1.5 release
patch('rocm.patch', when='@1.5+rocm')
# Fixes fatal error: sleef.h: No such file or directory
# https://github.com/pytorch/pytorch/pull/35359
# https://github.com/pytorch/pytorch/issues/26555
# patch('sleef.patch', when='@:1.5')
# Fixes compilation with Clang 9.0.0 and Apple Clang 11.0.3
# https://github.com/pytorch/pytorch/pull/37086
patch('https://github.com/pytorch/pytorch/commit/e921cd222a8fbeabf5a3e74e83e0d8dfb01aa8b5.patch',
sha256='17561b16cd2db22f10c0fe1fdcb428aecb0ac3964ba022a41343a6bb8cba7049',
when='@1.1:1.5')
# Removes duplicate definition of getCusparseErrorString
# https://github.com/pytorch/pytorch/issues/32083
patch('cusparseGetErrorString.patch', when='@:1.0^cuda@10.1.243:')
# Fixes 'FindOpenMP.cmake'
# to detect openmp settings used by Fujitsu compiler.
patch('detect_omp_of_fujitsu_compiler.patch', when='%fj')
# Fix compilation of +distributed~tensorpipe
# https://github.com/pytorch/pytorch/issues/68002
patch('https://github.com/pytorch/pytorch/commit/c075f0f633fa0136e68f0a455b5b74d7b500865c.patch',
sha256='e69e41b5c171bfb00d1b5d4ee55dd5e4c8975483230274af4ab461acd37e40b8', when='@1.10.0+distributed~tensorpipe')
@property
def libs(self):
# TODO: why doesn't `python_platlib` work here?
root = join_path(
self.prefix, self.spec['python'].package.platlib, 'torch', 'lib'
)
return find_libraries('libtorch', root)
@property
def headers(self):
# TODO: why doesn't `python_platlib` work here?
root = join_path(
self.prefix, self.spec['python'].package.platlib, 'torch', 'include'
)
headers = find_all_headers(root)
headers.directories = [root]
return headers
@when('@1.5.0:')
def patch(self):
# https://github.com/pytorch/pytorch/issues/52208
filter_file('torch_global_deps PROPERTIES LINKER_LANGUAGE C',
'torch_global_deps PROPERTIES LINKER_LANGUAGE CXX',
'caffe2/CMakeLists.txt')
def setup_build_environment(self, env):
"""Set environment variables used to control the build.
PyTorch's ``setup.py`` is a thin wrapper around ``cmake``.
In ``tools/setup_helpers/cmake.py``, you can see that all
environment variables that start with ``BUILD_``, ``USE_``,
or ``CMAKE_``, plus a few more explicitly specified variable
names, are passed directly to the ``cmake`` call. Therefore,
most flags defined in ``CMakeLists.txt`` can be specified as
environment variables.
"""
def enable_or_disable(variant, keyword='USE', var=None, newer=False):
"""Set environment variable to enable or disable support for a
particular variant.
Parameters:
variant (str): the variant to check
keyword (str): the prefix to use for enabling/disabling
var (str): CMake variable to set. Defaults to variant.upper()
newer (bool): newer variants that never used NO_*
"""
if var is None:
var = variant.upper()
# Version 1.1.0 switched from NO_* to USE_* or BUILD_*
# But some newer variants have always used USE_* or BUILD_*
if self.spec.satisfies('@1.1:') or newer:
if '+' + variant in self.spec:
env.set(keyword + '_' + var, 'ON')
elif '~' + variant in self.spec:
env.set(keyword + '_' + var, 'OFF')
else:
if '+' + variant in self.spec:
env.unset('NO_' + var)
elif '~' + variant in self.spec:
env.set('NO_' + var, 'ON')
# Build in parallel to speed up build times
env.set('MAX_JOBS', make_jobs)
# Spack logs have trouble handling colored output
env.set('COLORIZE_OUTPUT', 'OFF')
enable_or_disable('test', keyword='BUILD')
enable_or_disable('caffe2', keyword='BUILD')
enable_or_disable('cuda')
if '+cuda' in self.spec:
# cmake/public/cuda.cmake
# cmake/Modules_CUDA_fix/upstream/FindCUDA.cmake
env.unset('CUDA_ROOT')
torch_cuda_arch = ';'.join('{0:.1f}'.format(float(i) / 10.0) for i
in
self.spec.variants['cuda_arch'].value)
env.set('TORCH_CUDA_ARCH_LIST', torch_cuda_arch)
enable_or_disable('rocm')
enable_or_disable('cudnn')
if '+cudnn' in self.spec:
# cmake/Modules_CUDA_fix/FindCUDNN.cmake
env.set('CUDNN_INCLUDE_DIR', self.spec['cudnn'].prefix.include)
env.set('CUDNN_LIBRARY', self.spec['cudnn'].libs[0])
enable_or_disable('fbgemm')
enable_or_disable('kineto')
enable_or_disable('magma')
enable_or_disable('metal')
enable_or_disable('breakpad')
enable_or_disable('nccl')
if '+nccl' in self.spec:
env.set('NCCL_LIB_DIR', self.spec['nccl'].libs.directories[0])
env.set('NCCL_INCLUDE_DIR', self.spec['nccl'].prefix.include)
# cmake/External/nnpack.cmake
enable_or_disable('nnpack')
enable_or_disable('numa')
if '+numa' in self.spec:
# cmake/Modules/FindNuma.cmake
env.set('NUMA_ROOT_DIR', self.spec['numactl'].prefix)
# cmake/Modules/FindNumPy.cmake
enable_or_disable('numpy')
# cmake/Modules/FindOpenMP.cmake
enable_or_disable('openmp', newer=True)
enable_or_disable('qnnpack')
enable_or_disable('qnnpack', var='PYTORCH_QNNPACK')
enable_or_disable('valgrind')
enable_or_disable('xnnpack')
enable_or_disable('mkldnn')
enable_or_disable('distributed')
enable_or_disable('mpi')
# cmake/Modules/FindGloo.cmake
enable_or_disable('gloo', newer=True)
enable_or_disable('tensorpipe')
if '+onnx_ml' in self.spec:
env.set('ONNX_ML', 'ON')
elif '~onnx_ml' in self.spec:
env.set('ONNX_ML', 'OFF')
if not self.spec.satisfies('@master'):
env.set('PYTORCH_BUILD_VERSION', self.version)
env.set('PYTORCH_BUILD_NUMBER', 0)
# BLAS to be used by Caffe2
# Options defined in cmake/Dependencies.cmake and cmake/Modules/FindBLAS.cmake
if self.spec['blas'].name == 'atlas':
env.set('BLAS', 'ATLAS')
env.set('WITH_BLAS', 'atlas')
elif self.spec['blas'].name in ['blis', 'amdblis']:
env.set('BLAS', 'BLIS')
env.set('WITH_BLAS', 'blis')
elif self.spec['blas'].name == 'eigen':
env.set('BLAS', 'Eigen')
elif self.spec['lapack'].name in ['libflame', 'amdlibflame']:
env.set('BLAS', 'FLAME')
env.set('WITH_BLAS', 'FLAME')
elif self.spec['blas'].name in [
'intel-mkl', 'intel-parallel-studio', 'intel-oneapi-mkl']:
env.set('BLAS', 'MKL')
env.set('WITH_BLAS', 'mkl')
elif self.spec['blas'].name == 'openblas':
env.set('BLAS', 'OpenBLAS')
env.set('WITH_BLAS', 'open')
elif self.spec['blas'].name == 'veclibfort':
env.set('BLAS', 'vecLib')
env.set('WITH_BLAS', 'veclib')
else:
env.set('BLAS', 'Generic')
env.set('WITH_BLAS', 'generic')
# Don't use vendored third-party libraries when possible
env.set('BUILD_CUSTOM_PROTOBUF', 'OFF')
env.set('USE_SYSTEM_NCCL', 'ON')
env.set('USE_SYSTEM_EIGEN_INSTALL', 'ON')
env.set('pybind11_DIR', self.spec['py-pybind11'].prefix)
env.set('pybind11_INCLUDE_DIR',
self.spec['py-pybind11'].prefix.include)
if self.spec.satisfies('@1.10:'):
env.set('USE_SYSTEM_PYBIND11', 'ON')
# https://github.com/pytorch/pytorch/issues/60334
# if self.spec.satisfies('@1.8:'):
# env.set('USE_SYSTEM_SLEEF', 'ON')
if self.spec.satisfies('@1.6:'):
# env.set('USE_SYSTEM_LIBS', 'ON')
# https://github.com/pytorch/pytorch/issues/60329
# env.set('USE_SYSTEM_CPUINFO', 'ON')
# https://github.com/pytorch/pytorch/issues/60270
# env.set('USE_SYSTEM_GLOO', 'ON')
# https://github.com/Maratyszcza/FP16/issues/18
# env.set('USE_SYSTEM_FP16', 'ON')
env.set('USE_SYSTEM_PTHREADPOOL', 'ON')
env.set('USE_SYSTEM_PSIMD', 'ON')
env.set('USE_SYSTEM_FXDIV', 'ON')
env.set('USE_SYSTEM_BENCHMARK', 'ON')
# https://github.com/pytorch/pytorch/issues/60331
# env.set('USE_SYSTEM_ONNX', 'ON')
# https://github.com/pytorch/pytorch/issues/60332
# env.set('USE_SYSTEM_XNNPACK', 'ON')
@run_before('install')
def build_amd(self):
if '+rocm' in self.spec:
python(os.path.join('tools', 'amd_build', 'build_amd.py'))
@run_after('install')
@on_package_attributes(run_tests=True)
def install_test(self):
with working_dir('test'):
python('run_test.py')
diff --git a/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h b/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h
index 9cd678dfb4cc7..4630465115c7c 100644
--- a/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h
+++ b/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h
@@ -67,6 +67,14 @@ namespace at { namespace cuda {
//
// HIP doesn't have
// cuGetErrorString (maps to non-functional hipGetErrorString___)
+//
+// HIP from ROCm 3.5 on renamed hipOccupancyMaxActiveBlocksPerMultiprocessor
+// to hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.
+#if HIP_VERSION < 305
+#define HIPOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR hipOccupancyMaxActiveBlocksPerMultiprocessor
+#else
+#define HIPOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR cuOccupancyMaxActiveBlocksPerMultiprocessor
+#endif
#define AT_FORALL_NVRTC(_) \
_(nvrtcVersion) \
@@ -76,7 +84,7 @@ namespace at { namespace cuda {
_(nvrtcGetPTX) \
_(cuModuleLoadData) \
_(cuModuleGetFunction) \
- _(cuOccupancyMaxActiveBlocksPerMultiprocessor) \
+ _(HIPOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR)\
_(nvrtcGetErrorString) \
_(nvrtcGetProgramLogSize) \
_(nvrtcGetProgramLog) \
diff --git a/aten/src/ATen/native/cuda/SoftMax.cu b/aten/src/ATen/native/cuda/SoftMax.cu
index da1995123ecfc..f935eb4ef3d0e 100644
--- a/aten/src/ATen/native/cuda/SoftMax.cu
+++ b/aten/src/ATen/native/cuda/SoftMax.cu
@@ -127,8 +127,8 @@ void SpatialSoftMax_getLaunchSizes(
uint32_t block_threads = block.x * block.y;
smem_size = block.x == 1 ? 0 : block_threads * sizeof(accscalar_t);
int max_active_blocks;
-#ifdef __HIP_PLATFORM_HCC__
- // XXX HIP function signature is not compatible yet.
+#if defined(__HIP_PLATFORM_HCC__) && HIP_VERSION < 305
+ // HIP function signature is not compatible yet.
uint32_t max_blocks;
cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks,
k, block_threads, smem_size);
diff --git a/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp b/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp
index 5586e49919727..27315ee475277 100644
--- a/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp
+++ b/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp
@@ -140,10 +140,10 @@ FusedKernelCUDA::FusedKernelCUDA(
nvrtc().cuModuleGetFunction(&function_, module_, name_.c_str()));
// Computes max blocks
-#ifdef __HIP_PLATFORM_HCC__
- // XXX HIP function signature is not compatible yet
+#if defined(__HIP_PLATFORM_HCC__) && HIP_VERSION < 305
+ // HIP function signature is not compatible yet
uint32_t max_blocks;
- AT_CUDA_DRIVER_CHECK(nvrtc().cuOccupancyMaxActiveBlocksPerMultiprocessor(
+ AT_CUDA_DRIVER_CHECK(nvrtc().hipOccupancyMaxActiveBlocksPerMultiprocessor(
&max_blocks, function_, 128, 0));
maxBlocks_ = max_blocks;
#else
diff --git a/torch/utils/hipify/cuda_to_hip_mappings.py b/torch/utils/hipify/cuda_to_hip_mappings.py
index 7e21363cbe6af..26f269d92ae38 100644
--- a/torch/utils/hipify/cuda_to_hip_mappings.py
+++ b/torch/utils/hipify/cuda_to_hip_mappings.py
@@ -2890,7 +2890,7 @@
(
"cuOccupancyMaxActiveBlocksPerMultiprocessor",
(
- "hipOccupancyMaxActiveBlocksPerMultiprocessor",
+ "hipModuleOccupancyMaxActiveBlocksPerMultiprocessor",
CONV_OCCUPANCY,
API_DRIVER,
),
@@ -2898,7 +2898,7 @@
(
"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags",
(
- "hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags",
+ "hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags",
CONV_OCCUPANCY,
API_DRIVER,
HIP_UNSUPPORTED,
@@ -2906,12 +2906,12 @@
),
(
"cuOccupancyMaxPotentialBlockSize",
- ("hipOccupancyMaxPotentialBlockSize", CONV_OCCUPANCY, API_DRIVER),
+ ("hipModuleOccupancyMaxPotentialBlockSize", CONV_OCCUPANCY, API_DRIVER),
),
(
"cuOccupancyMaxPotentialBlockSizeWithFlags",
(
- "hipOccupancyMaxPotentialBlockSizeWithFlags",
+ "hipModuleOccupancyMaxPotentialBlockSizeWithFlags",
CONV_OCCUPANCY,
API_DRIVER,
HIP_UNSUPPORTED,
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index 8025a7de3c..2e5cdbb5c9 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -1232,6 +1232,7 @@ if (BUILD_TEST)
add_executable(${test_name} "${test_src}")
target_link_libraries(${test_name} ${Caffe2_MAIN_LIBS} gtest_main)
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
+ target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
if (INSTALL_TEST)
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index 8025a7de3c..0da37079d6 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -46,12 +46,19 @@ if (INTERN_BUILD_ATEN_OPS)
list(APPEND Caffe2_DEPENDENCY_INCLUDE ${ATen_THIRD_PARTY_INCLUDE})
endif()
+# {Q/X,etc} NPACK support is enabled by default, if none of these options
+# are selected, turn this flag ON to incidate the support is disabled
+set(NNPACK_AND_FAMILY_DISABLED OFF)
+if(NOT (USE_NNPACK OR USE_QNNPACK OR USE_PYTORCH_QNNPACK OR USE_XNNPACK))
+ set(NNPACK_AND_FAMILY_DISABLED ON)
+endif()
+
# ---[ Caffe2 build
# Note: the folders that are being commented out have not been properly
# addressed yet.
# For pthreadpool_new_if_impl. TODO: Remove when threadpools are unitied.
-if (NOT MSVC)
+if (NOT MSVC AND NOT NNPACK_AND_FAMILY_DISABLED)
IF(NOT TARGET fxdiv)
SET(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
SET(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
@@ -710,7 +717,7 @@ ELSEIF(USE_CUDA)
ENDIF()
-if (NOT MSVC)
+if (NOT MSVC AND NOT NNPACK_AND_FAMILY_DISABLED)
TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
endif()
diff --git a/caffe2/utils/CMakeLists.txt b/caffe2/utils/CMakeLists.txt
index 27aabb1315..3c7845c67d 100644
--- a/caffe2/utils/CMakeLists.txt
+++ b/caffe2/utils/CMakeLists.txt
@@ -36,7 +36,7 @@ list(APPEND Caffe2_CPU_SRCS
# ---[ threadpool/pthreadpool* is a local modification of the NNPACK
# pthreadpool with a very similar interface. Neither NNPACK, nor this
# thread pool supports Windows.
-if (NOT MSVC)
+if (NOT MSVC AND NOT NNPACK_AND_FAMILY_DISABLED)
add_definitions(-DUSE_INTERNAL_THREADPOOL_IMPL)
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS}
utils/threadpool/pthreadpool.cc
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment