diff --git a/packages/py-torch/cusparseGetErrorString.patch b/packages/py-torch/cusparseGetErrorString.patch
new file mode 100644
index 0000000000000000000000000000000000000000..9cb136b3f88faee0631eaac14c2915664e30dc6c
--- /dev/null
+++ b/packages/py-torch/cusparseGetErrorString.patch
@@ -0,0 +1,53 @@
+diff --git a/aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cu b/aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cu
+index 1cee04c200..f46003d9a9 100644
+--- a/aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cu
++++ b/aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cu
+@@ -10,48 +10,6 @@
+ namespace at { namespace native { namespace sparse { namespace cuda {
+ 
+ 
+-std::string cusparseGetErrorString(cusparseStatus_t status) {
+-  switch(status)
+-  {
+-    case CUSPARSE_STATUS_SUCCESS:
+-      return "success";
+-
+-    case CUSPARSE_STATUS_NOT_INITIALIZED:
+-      return "library not initialized";
+-
+-    case CUSPARSE_STATUS_ALLOC_FAILED:
+-      return "resource allocation failed";
+-
+-    case CUSPARSE_STATUS_INVALID_VALUE:
+-      return "an invalid numeric value was used as an argument";
+-
+-    case CUSPARSE_STATUS_ARCH_MISMATCH:
+-      return "an absent device architectural feature is required";
+-
+-    case CUSPARSE_STATUS_MAPPING_ERROR:
+-      return "an access to GPU memory space failed";
+-
+-    case CUSPARSE_STATUS_EXECUTION_FAILED:
+-      return "the GPU program failed to execute";
+-
+-    case CUSPARSE_STATUS_INTERNAL_ERROR:
+-      return "an internal operation failed";
+-
+-    case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
+-      return "the matrix type is not supported by this function";
+-
+-    case CUSPARSE_STATUS_ZERO_PIVOT:
+-      return "an entry of the matrix is either structural zero or numerical zero (singular block)";
+-
+-    default:
+-      {
+-        std::ostringstream oss;
+-        oss << "unknown error " << static_cast<int64_t>(status);
+-        return oss.str();
+-      }
+-  }
+-}
+-
+ inline void CUSPARSE_CHECK(cusparseStatus_t status)
+ {
+   if (status != CUSPARSE_STATUS_SUCCESS) {
diff --git a/packages/py-torch/detect_omp_of_fujitsu_compiler.patch b/packages/py-torch/detect_omp_of_fujitsu_compiler.patch
new file mode 100644
index 0000000000000000000000000000000000000000..519d66869d578ea4a59c4e7f626569baade6837a
--- /dev/null
+++ b/packages/py-torch/detect_omp_of_fujitsu_compiler.patch
@@ -0,0 +1,20 @@
+--- pytorch/cmake/Modules/FindOpenMP.cmake.org	2020-05-26 17:43:53.000000000 +0900
++++ pytorch/cmake/Modules/FindOpenMP.cmake	2020-05-26 17:46:37.000000000 +0900
+@@ -84,7 +84,7 @@
+     unset(OpenMP_FLAG_CANDIDATES)
+ 
+     set(OMP_FLAG_GNU "-fopenmp")
+-    set(OMP_FLAG_Clang "-fopenmp=libomp" "-fopenmp=libiomp5" "-fopenmp")
++    set(OMP_FLAG_Clang "-fopenmp" "-fopenmp=libomp" "-fopenmp=libiomp5")
+ 
+     # AppleClang may need a header file, search for omp.h with hints to brew
+     # default include dir
+@@ -245,7 +245,7 @@
+         set(OpenMP_libomp_LIBRARY "${MKL_OPENMP_LIBRARY}" CACHE STRING "libomp location for OpenMP")
+       else()
+         find_library(OpenMP_libomp_LIBRARY
+-          NAMES omp gomp iomp5
++          NAMES fjomp omp gomp iomp5
+           HINTS ${CMAKE_${LANG}_IMPLICIT_LINK_DIRECTORIES}
+           DOC "libomp location for OpenMP"
+         )
diff --git a/packages/py-torch/package.py b/packages/py-torch/package.py
new file mode 100644
index 0000000000000000000000000000000000000000..de0c36bea5240150e4539069c405233b7c685a9b
--- /dev/null
+++ b/packages/py-torch/package.py
@@ -0,0 +1,407 @@
+# Copyright 2013-2022 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+import os
+import sys
+
+from spack import *
+
+
+class PyTorch(PythonPackage, CudaPackage):
+    """Tensors and Dynamic neural networks in Python
+    with strong GPU acceleration."""
+
+    homepage = "https://pytorch.org/"
+    git      = "https://github.com/pytorch/pytorch.git"
+
+    maintainers = ['adamjstewart']
+
+    # Exact set of modules is version- and variant-specific, just attempt to import the
+    # core libraries to ensure that the package was successfully installed.
+    import_modules = ['torch', 'torch.autograd', 'torch.nn', 'torch.utils']
+
+    version('master', branch='master', submodules=True)
+    version('1.10.2', tag='v1.10.2', submodules=True)
+    version('1.10.1', tag='v1.10.1', submodules=True)
+    version('1.10.0', tag='v1.10.0', submodules=True)
+    version('1.9.1', tag='v1.9.1', submodules=True)
+    version('1.9.0', tag='v1.9.0', submodules=True)
+    version('1.8.2', tag='v1.8.2', submodules=True)
+    version('1.8.1', tag='v1.8.1', submodules=True)
+    version('1.8.0', tag='v1.8.0', submodules=True)
+    version('1.7.1', tag='v1.7.1', submodules=True)
+    version('1.7.0', tag='v1.7.0', submodules=True)
+    version('1.6.0', tag='v1.6.0', submodules=True)
+    version('1.5.1', tag='v1.5.1', submodules=True)
+    version('1.5.0', tag='v1.5.0', submodules=True)
+    version('1.4.1', tag='v1.4.1', submodules=True)
+    version('1.3.1', tag='v1.3.1', submodules=True)
+    version('1.3.0', tag='v1.3.0', submodules=True)
+    version('1.2.0', tag='v1.2.0', submodules=True)
+    version('1.1.0', tag='v1.1.0', submodules=True)
+    version('1.0.1', tag='v1.0.1', submodules=True, deprecated=True)
+    version('1.0.0', tag='v1.0.0', submodules=True, deprecated=True)
+
+    is_darwin = sys.platform == 'darwin'
+
+    # All options are defined in CMakeLists.txt.
+    # Some are listed in setup.py, but not all.
+    variant('caffe2', default=True, description='Build Caffe2', when='@1.7:')
+    variant('test', default=False, description='Build C++ test binaries')
+    variant('cuda', default=not is_darwin, description='Use CUDA')
+    variant('rocm', default=False, description='Use ROCm')
+    variant('cudnn', default=not is_darwin, description='Use cuDNN', when='+cuda')
+    variant('fbgemm', default=True, description='Use FBGEMM (quantized 8-bit server operators)')
+    variant('kineto', default=True, description='Use Kineto profiling library', when='@1.8:')
+    variant('magma', default=not is_darwin, description='Use MAGMA', when='+cuda')
+    variant('metal', default=is_darwin, description='Use Metal for Caffe2 iOS build')
+    variant('nccl', default=True, description='Use NCCL', when='+cuda platform=linux')
+    variant('nccl', default=True, description='Use NCCL', when='+cuda platform=cray')
+    variant('nccl', default=True, description='Use NCCL', when='+rocm platform=linux')
+    variant('nccl', default=True, description='Use NCCL', when='+rocm platform=cray')
+    variant('nnpack', default=True, description='Use NNPACK')
+    variant('numa', default=True, description='Use NUMA', when='platform=linux')
+    variant('numa', default=True, description='Use NUMA', when='platform=cray')
+    variant('numpy', default=True, description='Use NumPy')
+    variant('openmp', default=True, description='Use OpenMP for parallel code')
+    variant('qnnpack', default=True, description='Use QNNPACK (quantized 8-bit operators)')
+    variant('valgrind', default=True, description='Use Valgrind', when='@1.8: platform=linux')
+    variant('valgrind', default=True, description='Use Valgrind', when='@1.8: platform=cray')
+    variant('xnnpack', default=True, description='Use XNNPACK', when='@1.5:')
+    variant('mkldnn', default=True, description='Use MKLDNN')
+    variant('distributed', default=not is_darwin, description='Use distributed')
+    variant('mpi', default=not is_darwin, description='Use MPI for Caffe2', when='+distributed')
+    variant('gloo', default=not is_darwin, description='Use Gloo', when='+distributed')
+    variant('tensorpipe', default=not is_darwin, description='Use TensorPipe', when='@1.6: +distributed')
+    variant('onnx_ml', default=True, description='Enable traditional ONNX ML API', when='@1.5:')
+    variant('breakpad', default=True, description='Enable breakpad crash dump library', when='@1.9:')
+
+    conflicts('+cuda+rocm')
+    conflicts('+breakpad', when='target=ppc64:')
+    conflicts('+breakpad', when='target=ppc64le:')
+
+    conflicts('cuda_arch=none', when='+cuda',
+              msg='Must specify CUDA compute capabilities of your GPU, see '
+              'https://developer.nvidia.com/cuda-gpus')
+
+    # Required dependencies
+    depends_on('cmake@3.5:', type='build')
+    # Use Ninja generator to speed up build times, automatically used if found
+    depends_on('ninja@1.5:', when='@1.1:', type='build')
+    # See python_min_version in setup.py
+    depends_on('python@3.6.2:', when='@1.7.1:', type=('build', 'link', 'run'))
+    depends_on('python@3.6.1:', when='@1.6:1.7.0', type=('build', 'link', 'run'))
+    depends_on('python@3.5:', when='@1.5', type=('build', 'link', 'run'))
+    depends_on('python@2.7:2,3.5:', when='@1.4', type=('build', 'link', 'run'))
+    depends_on('python@2.7:2,3.5:3.7', when='@:1.3', type=('build', 'link', 'run'))
+    depends_on('py-setuptools', type=('build', 'run'))
+    depends_on('py-future', when='@1.5:', type=('build', 'run'))
+    depends_on('py-future', when='@1.1: ^python@:2', type=('build', 'run'))
+    depends_on('py-pyyaml', type=('build', 'run'))
+    depends_on('py-typing', when='^python@:3.4', type=('build', 'run'))
+    depends_on('py-pybind11@2.6.2:', when='@1.8:', type=('build', 'link', 'run'))
+    depends_on('py-pybind11@2.3.0', when='@1.1:1.7', type=('build', 'link', 'run'))
+    depends_on('py-pybind11@2.2.4', when='@:1.0', type=('build', 'link', 'run'))
+    depends_on('py-dataclasses', when='@1.7: ^python@3.6', type=('build', 'run'))
+    depends_on('py-tqdm', type='run')
+    # https://github.com/onnx/onnx#prerequisites
+    depends_on('py-numpy@1.16.6:', type=('build', 'run'))
+    depends_on('py-protobuf@3.12.2:', when='@1.10:', type=('build', 'run'))
+    depends_on('py-protobuf@:3.14', when='@:1.9', type=('build', 'run'))
+    depends_on('protobuf@3.12.2:', when='@1.10:')
+    depends_on('protobuf@:3.14', when='@:1.9')
+    depends_on('py-typing-extensions@3.6.2.1:', when='@1.7:', type=('build', 'run'))
+    depends_on('blas')
+    depends_on('lapack')
+    depends_on('eigen')
+    # https://github.com/pytorch/pytorch/issues/60329
+    # depends_on('cpuinfo@2020-12-17', when='@1.8:')
+    # depends_on('cpuinfo@2020-06-11', when='@1.6:1.7')
+    # https://github.com/shibatch/sleef/issues/427
+    # depends_on('sleef@3.5.1_2020-12-22', when='@1.8:')
+    # https://github.com/pytorch/pytorch/issues/60334
+    # depends_on('sleef@3.4.0_2019-07-30', when='@1.6:1.7')
+    # https://github.com/Maratyszcza/FP16/issues/18
+    # depends_on('fp16@2020-05-14', when='@1.6:')
+    depends_on('pthreadpool@2021-04-13', when='@1.9:')
+    depends_on('pthreadpool@2020-10-05', when='@1.8')
+    depends_on('pthreadpool@2020-06-15', when='@1.6:1.7')
+    depends_on('psimd@2020-05-17', when='@1.6:')
+    depends_on('fxdiv@2020-04-17', when='@1.6:')
+    depends_on('benchmark', when='@1.6:+test')
+
+    # Optional dependencies
+    # https://discuss.pytorch.org/t/compiling-1-10-1-from-source-with-gcc-11-and-cuda-11-5/140971
+    depends_on('cuda@9.2:', when='@1.11:+cuda', type=('build', 'link', 'run'))
+    depends_on('cuda@9.2:11.4', when='@1.6:+cuda', type=('build', 'link', 'run'))
+    depends_on('cuda@9:11.4', when='@1.1:+cuda', type=('build', 'link', 'run'))
+    depends_on('cuda@7.5:11.4', when='+cuda', type=('build', 'link', 'run'))
+    depends_on('cudnn@6:7', when='@:1.0+cudnn')
+    depends_on('cudnn@7.0:7', when='@1.1:1.5+cudnn')
+    depends_on('cudnn@7:', when='@1.6:+cudnn')
+    depends_on('magma', when='+magma')
+    depends_on('nccl', when='+nccl')
+    depends_on('numactl', when='+numa')
+    depends_on('llvm-openmp', when='%apple-clang +openmp')
+    depends_on('valgrind', when='+valgrind')
+    # https://github.com/pytorch/pytorch/issues/60332
+    # depends_on('xnnpack@2021-02-22', when='@1.8:+xnnpack')
+    # depends_on('xnnpack@2020-03-23', when='@1.6:1.7+xnnpack')
+    depends_on('mpi', when='+mpi')
+    # https://github.com/pytorch/pytorch/issues/60270
+    # depends_on('gloo@2021-05-04', when='@1.9:+gloo')
+    # depends_on('gloo@2020-09-18', when='@1.7:1.8+gloo')
+    # depends_on('gloo@2020-03-17', when='@1.6+gloo')
+    # https://github.com/pytorch/pytorch/issues/60331
+    # depends_on('onnx@1.8.0_2020-11-03', when='@1.8:+onnx_ml')
+    # depends_on('onnx@1.7.0_2020-05-31', when='@1.6:1.7+onnx_ml')
+    depends_on('mkl', when='+mkldnn')
+
+    # Test dependencies
+    depends_on('py-hypothesis', type='test')
+    depends_on('py-six', type='test')
+    depends_on('py-psutil', type='test')
+
+    # Fix BLAS being overridden by MKL
+    # https://github.com/pytorch/pytorch/issues/60328
+    patch('https://patch-diff.githubusercontent.com/raw/pytorch/pytorch/pull/59220.patch',
+          sha256='e37afffe45cf7594c22050109942370e49983ad772d12ebccf508377dc9dcfc9',
+          when='@1.2:')
+
+    # Fixes build on older systems with glibc <2.12
+    patch('https://patch-diff.githubusercontent.com/raw/pytorch/pytorch/pull/55063.patch',
+          sha256='e17eaa42f5d7c18bf0d7c37d7b0910127a01ad53fdce3e226a92893356a70395',
+          when='@1.1:1.8.1')
+
+    # Fixes CMake configuration error when XNNPACK is disabled
+    # https://github.com/pytorch/pytorch/pull/35607
+    # https://github.com/pytorch/pytorch/pull/37865
+    patch('xnnpack.patch', when='@1.5')
+
+    # Fixes build error when ROCm is enabled for pytorch-1.5 release
+    patch('rocm.patch', when='@1.5+rocm')
+
+    # Fixes fatal error: sleef.h: No such file or directory
+    # https://github.com/pytorch/pytorch/pull/35359
+    # https://github.com/pytorch/pytorch/issues/26555
+    # patch('sleef.patch', when='@:1.5')
+
+    # Fixes compilation with Clang 9.0.0 and Apple Clang 11.0.3
+    # https://github.com/pytorch/pytorch/pull/37086
+    patch('https://github.com/pytorch/pytorch/commit/e921cd222a8fbeabf5a3e74e83e0d8dfb01aa8b5.patch',
+          sha256='17561b16cd2db22f10c0fe1fdcb428aecb0ac3964ba022a41343a6bb8cba7049',
+          when='@1.1:1.5')
+
+    # Removes duplicate definition of getCusparseErrorString
+    # https://github.com/pytorch/pytorch/issues/32083
+    patch('cusparseGetErrorString.patch', when='@:1.0^cuda@10.1.243:')
+
+    # Fixes 'FindOpenMP.cmake'
+    # to detect openmp settings used by Fujitsu compiler.
+    patch('detect_omp_of_fujitsu_compiler.patch', when='%fj')
+
+    # Fix compilation of +distributed~tensorpipe
+    # https://github.com/pytorch/pytorch/issues/68002
+    patch('https://github.com/pytorch/pytorch/commit/c075f0f633fa0136e68f0a455b5b74d7b500865c.patch',
+          sha256='e69e41b5c171bfb00d1b5d4ee55dd5e4c8975483230274af4ab461acd37e40b8', when='@1.10.0+distributed~tensorpipe')
+
+    @property
+    def libs(self):
+        # TODO: why doesn't `python_platlib` work here?
+        root = join_path(
+            self.prefix, self.spec['python'].package.platlib, 'torch', 'lib'
+        )
+        return find_libraries('libtorch', root)
+
+    @property
+    def headers(self):
+        # TODO: why doesn't `python_platlib` work here?
+        root = join_path(
+            self.prefix, self.spec['python'].package.platlib, 'torch', 'include'
+        )
+        headers = find_all_headers(root)
+        headers.directories = [root]
+        return headers
+
+    @when('@1.5.0:')
+    def patch(self):
+        # https://github.com/pytorch/pytorch/issues/52208
+        filter_file('torch_global_deps PROPERTIES LINKER_LANGUAGE C',
+                    'torch_global_deps PROPERTIES LINKER_LANGUAGE CXX',
+                    'caffe2/CMakeLists.txt')
+
+    def setup_build_environment(self, env):
+        """Set environment variables used to control the build.
+
+        PyTorch's ``setup.py`` is a thin wrapper around ``cmake``.
+        In ``tools/setup_helpers/cmake.py``, you can see that all
+        environment variables that start with ``BUILD_``, ``USE_``,
+        or ``CMAKE_``, plus a few more explicitly specified variable
+        names, are passed directly to the ``cmake`` call. Therefore,
+        most flags defined in ``CMakeLists.txt`` can be specified as
+        environment variables.
+        """
+        def enable_or_disable(variant, keyword='USE', var=None, newer=False):
+            """Set environment variable to enable or disable support for a
+            particular variant.
+
+            Parameters:
+                variant (str): the variant to check
+                keyword (str): the prefix to use for enabling/disabling
+                var (str): CMake variable to set. Defaults to variant.upper()
+                newer (bool): newer variants that never used NO_*
+            """
+            if var is None:
+                var = variant.upper()
+
+            # Version 1.1.0 switched from NO_* to USE_* or BUILD_*
+            # But some newer variants have always used USE_* or BUILD_*
+            if self.spec.satisfies('@1.1:') or newer:
+                if '+' + variant in self.spec:
+                    env.set(keyword + '_' + var, 'ON')
+                elif '~' + variant in self.spec:
+                    env.set(keyword + '_' + var, 'OFF')
+            else:
+                if '+' + variant in self.spec:
+                    env.unset('NO_' + var)
+                elif '~' + variant in self.spec:
+                    env.set('NO_' + var, 'ON')
+
+        # Build in parallel to speed up build times
+        env.set('MAX_JOBS', make_jobs)
+
+        # Spack logs have trouble handling colored output
+        env.set('COLORIZE_OUTPUT', 'OFF')
+
+        enable_or_disable('test', keyword='BUILD')
+        enable_or_disable('caffe2', keyword='BUILD')
+
+        enable_or_disable('cuda')
+        if '+cuda' in self.spec:
+            # cmake/public/cuda.cmake
+            # cmake/Modules_CUDA_fix/upstream/FindCUDA.cmake
+            env.unset('CUDA_ROOT')
+            torch_cuda_arch = ';'.join('{0:.1f}'.format(float(i) / 10.0) for i
+                                       in
+                                       self.spec.variants['cuda_arch'].value)
+            env.set('TORCH_CUDA_ARCH_LIST', torch_cuda_arch)
+
+        enable_or_disable('rocm')
+
+        enable_or_disable('cudnn')
+        if '+cudnn' in self.spec:
+            # cmake/Modules_CUDA_fix/FindCUDNN.cmake
+            env.set('CUDNN_INCLUDE_DIR', self.spec['cudnn'].prefix.include)
+            env.set('CUDNN_LIBRARY', self.spec['cudnn'].libs[0])
+
+        enable_or_disable('fbgemm')
+        enable_or_disable('kineto')
+        enable_or_disable('magma')
+        enable_or_disable('metal')
+        enable_or_disable('breakpad')
+
+        enable_or_disable('nccl')
+        if '+nccl' in self.spec:
+            env.set('NCCL_LIB_DIR', self.spec['nccl'].libs.directories[0])
+            env.set('NCCL_INCLUDE_DIR', self.spec['nccl'].prefix.include)
+
+        # cmake/External/nnpack.cmake
+        enable_or_disable('nnpack')
+
+        enable_or_disable('numa')
+        if '+numa' in self.spec:
+            # cmake/Modules/FindNuma.cmake
+            env.set('NUMA_ROOT_DIR', self.spec['numactl'].prefix)
+
+        # cmake/Modules/FindNumPy.cmake
+        enable_or_disable('numpy')
+        # cmake/Modules/FindOpenMP.cmake
+        enable_or_disable('openmp', newer=True)
+        enable_or_disable('qnnpack')
+        enable_or_disable('qnnpack', var='PYTORCH_QNNPACK')
+        enable_or_disable('valgrind')
+        enable_or_disable('xnnpack')
+        enable_or_disable('mkldnn')
+        enable_or_disable('distributed')
+        enable_or_disable('mpi')
+        # cmake/Modules/FindGloo.cmake
+        enable_or_disable('gloo', newer=True)
+        enable_or_disable('tensorpipe')
+
+        if '+onnx_ml' in self.spec:
+            env.set('ONNX_ML', 'ON')
+        elif '~onnx_ml' in self.spec:
+            env.set('ONNX_ML', 'OFF')
+
+        if not self.spec.satisfies('@master'):
+            env.set('PYTORCH_BUILD_VERSION', self.version)
+            env.set('PYTORCH_BUILD_NUMBER', 0)
+
+        # BLAS to be used by Caffe2
+        # Options defined in cmake/Dependencies.cmake and cmake/Modules/FindBLAS.cmake
+        if self.spec['blas'].name == 'atlas':
+            env.set('BLAS', 'ATLAS')
+            env.set('WITH_BLAS', 'atlas')
+        elif self.spec['blas'].name in ['blis', 'amdblis']:
+            env.set('BLAS', 'BLIS')
+            env.set('WITH_BLAS', 'blis')
+        elif self.spec['blas'].name == 'eigen':
+            env.set('BLAS', 'Eigen')
+        elif self.spec['lapack'].name in ['libflame', 'amdlibflame']:
+            env.set('BLAS', 'FLAME')
+            env.set('WITH_BLAS', 'FLAME')
+        elif self.spec['blas'].name in [
+                'intel-mkl', 'intel-parallel-studio', 'intel-oneapi-mkl']:
+            env.set('BLAS', 'MKL')
+            env.set('WITH_BLAS', 'mkl')
+        elif self.spec['blas'].name == 'openblas':
+            env.set('BLAS', 'OpenBLAS')
+            env.set('WITH_BLAS', 'open')
+        elif self.spec['blas'].name == 'veclibfort':
+            env.set('BLAS', 'vecLib')
+            env.set('WITH_BLAS', 'veclib')
+        else:
+            env.set('BLAS', 'Generic')
+            env.set('WITH_BLAS', 'generic')
+
+        # Don't use vendored third-party libraries when possible
+        env.set('BUILD_CUSTOM_PROTOBUF', 'OFF')
+        env.set('USE_SYSTEM_NCCL', 'ON')
+        env.set('USE_SYSTEM_EIGEN_INSTALL', 'ON')
+        env.set('pybind11_DIR', self.spec['py-pybind11'].prefix)
+        env.set('pybind11_INCLUDE_DIR',
+                self.spec['py-pybind11'].prefix.include)
+        if self.spec.satisfies('@1.10:'):
+            env.set('USE_SYSTEM_PYBIND11', 'ON')
+        # https://github.com/pytorch/pytorch/issues/60334
+        # if self.spec.satisfies('@1.8:'):
+        #     env.set('USE_SYSTEM_SLEEF', 'ON')
+        if self.spec.satisfies('@1.6:'):
+            # env.set('USE_SYSTEM_LIBS', 'ON')
+            # https://github.com/pytorch/pytorch/issues/60329
+            # env.set('USE_SYSTEM_CPUINFO', 'ON')
+            # https://github.com/pytorch/pytorch/issues/60270
+            # env.set('USE_SYSTEM_GLOO', 'ON')
+            # https://github.com/Maratyszcza/FP16/issues/18
+            # env.set('USE_SYSTEM_FP16', 'ON')
+            env.set('USE_SYSTEM_PTHREADPOOL', 'ON')
+            env.set('USE_SYSTEM_PSIMD', 'ON')
+            env.set('USE_SYSTEM_FXDIV', 'ON')
+            env.set('USE_SYSTEM_BENCHMARK', 'ON')
+            # https://github.com/pytorch/pytorch/issues/60331
+            # env.set('USE_SYSTEM_ONNX', 'ON')
+            # https://github.com/pytorch/pytorch/issues/60332
+            # env.set('USE_SYSTEM_XNNPACK', 'ON')
+
+    @run_before('install')
+    def build_amd(self):
+        if '+rocm' in self.spec:
+            python(os.path.join('tools', 'amd_build', 'build_amd.py'))
+
+    @run_after('install')
+    @on_package_attributes(run_tests=True)
+    def install_test(self):
+        with working_dir('test'):
+            python('run_test.py')
diff --git a/packages/py-torch/rocm.patch b/packages/py-torch/rocm.patch
new file mode 100644
index 0000000000000000000000000000000000000000..b50cc7e1598a23f41e1e1a73e6672e6a4d132b6a
--- /dev/null
+++ b/packages/py-torch/rocm.patch
@@ -0,0 +1,98 @@
+diff --git a/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h b/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h
+index 9cd678dfb4cc7..4630465115c7c 100644
+--- a/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h
++++ b/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h
+@@ -67,6 +67,14 @@ namespace at { namespace cuda {
+ //
+ // HIP doesn't have
+ //   cuGetErrorString  (maps to non-functional hipGetErrorString___)
++//
++// HIP from ROCm 3.5 on renamed hipOccupancyMaxActiveBlocksPerMultiprocessor
++// to hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.
++#if HIP_VERSION < 305
++#define HIPOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR hipOccupancyMaxActiveBlocksPerMultiprocessor
++#else
++#define HIPOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR cuOccupancyMaxActiveBlocksPerMultiprocessor
++#endif
+ 
+ #define AT_FORALL_NVRTC(_)                       \
+   _(nvrtcVersion)                                \
+@@ -76,7 +84,7 @@ namespace at { namespace cuda {
+   _(nvrtcGetPTX)                                 \
+   _(cuModuleLoadData)                            \
+   _(cuModuleGetFunction)                         \
+-  _(cuOccupancyMaxActiveBlocksPerMultiprocessor) \
++  _(HIPOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR)\
+   _(nvrtcGetErrorString)                         \
+   _(nvrtcGetProgramLogSize)                      \
+   _(nvrtcGetProgramLog)                          \
+diff --git a/aten/src/ATen/native/cuda/SoftMax.cu b/aten/src/ATen/native/cuda/SoftMax.cu
+index da1995123ecfc..f935eb4ef3d0e 100644
+--- a/aten/src/ATen/native/cuda/SoftMax.cu
++++ b/aten/src/ATen/native/cuda/SoftMax.cu
+@@ -127,8 +127,8 @@ void SpatialSoftMax_getLaunchSizes(
+   uint32_t block_threads = block.x * block.y;
+   smem_size = block.x == 1 ? 0 : block_threads * sizeof(accscalar_t);
+   int max_active_blocks;
+-#ifdef __HIP_PLATFORM_HCC__
+-  // XXX HIP function signature is not compatible yet.
++#if defined(__HIP_PLATFORM_HCC__) && HIP_VERSION < 305
++  // HIP function signature is not compatible yet.
+   uint32_t max_blocks;
+   cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks,
+                                                 k, block_threads, smem_size);
+diff --git a/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp b/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp
+index 5586e49919727..27315ee475277 100644
+--- a/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp
++++ b/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp
+@@ -140,10 +140,10 @@ FusedKernelCUDA::FusedKernelCUDA(
+       nvrtc().cuModuleGetFunction(&function_, module_, name_.c_str()));
+ 
+   // Computes max blocks
+-#ifdef __HIP_PLATFORM_HCC__
+-  // XXX HIP function signature is not compatible yet
++#if defined(__HIP_PLATFORM_HCC__) && HIP_VERSION < 305
++  // HIP function signature is not compatible yet
+   uint32_t max_blocks;
+-  AT_CUDA_DRIVER_CHECK(nvrtc().cuOccupancyMaxActiveBlocksPerMultiprocessor(
++  AT_CUDA_DRIVER_CHECK(nvrtc().hipOccupancyMaxActiveBlocksPerMultiprocessor(
+       &max_blocks, function_, 128, 0));
+   maxBlocks_ = max_blocks;
+ #else
+diff --git a/torch/utils/hipify/cuda_to_hip_mappings.py b/torch/utils/hipify/cuda_to_hip_mappings.py
+index 7e21363cbe6af..26f269d92ae38 100644
+--- a/torch/utils/hipify/cuda_to_hip_mappings.py
++++ b/torch/utils/hipify/cuda_to_hip_mappings.py
+@@ -2890,7 +2890,7 @@
+         (
+             "cuOccupancyMaxActiveBlocksPerMultiprocessor",
+             (
+-                "hipOccupancyMaxActiveBlocksPerMultiprocessor",
++                "hipModuleOccupancyMaxActiveBlocksPerMultiprocessor",
+                 CONV_OCCUPANCY,
+                 API_DRIVER,
+             ),
+@@ -2898,7 +2898,7 @@
+         (
+             "cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags",
+             (
+-                "hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags",
++                "hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags",
+                 CONV_OCCUPANCY,
+                 API_DRIVER,
+                 HIP_UNSUPPORTED,
+@@ -2906,12 +2906,12 @@
+         ),
+         (
+             "cuOccupancyMaxPotentialBlockSize",
+-            ("hipOccupancyMaxPotentialBlockSize", CONV_OCCUPANCY, API_DRIVER),
++            ("hipModuleOccupancyMaxPotentialBlockSize", CONV_OCCUPANCY, API_DRIVER),
+         ),
+         (
+             "cuOccupancyMaxPotentialBlockSizeWithFlags",
+             (
+-                "hipOccupancyMaxPotentialBlockSizeWithFlags",
++                "hipModuleOccupancyMaxPotentialBlockSizeWithFlags",
+                 CONV_OCCUPANCY,
+                 API_DRIVER,
+                 HIP_UNSUPPORTED,
diff --git a/packages/py-torch/sleef.patch b/packages/py-torch/sleef.patch
new file mode 100644
index 0000000000000000000000000000000000000000..67f0234162d1a1af29aa5c538b0e585c3261a81e
--- /dev/null
+++ b/packages/py-torch/sleef.patch
@@ -0,0 +1,12 @@
+diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
+index 8025a7de3c..2e5cdbb5c9 100644
+--- a/caffe2/CMakeLists.txt
++++ b/caffe2/CMakeLists.txt
+@@ -1232,6 +1232,7 @@ if (BUILD_TEST)
+     add_executable(${test_name} "${test_src}")
+     target_link_libraries(${test_name} ${Caffe2_MAIN_LIBS} gtest_main)
+     target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
++    target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
+     target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
+     add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
+     if (INSTALL_TEST)
diff --git a/packages/py-torch/xnnpack.patch b/packages/py-torch/xnnpack.patch
new file mode 100644
index 0000000000000000000000000000000000000000..154033081e7ff91867e9a043a93c46b888bfe8cb
--- /dev/null
+++ b/packages/py-torch/xnnpack.patch
@@ -0,0 +1,47 @@
+diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
+index 8025a7de3c..0da37079d6 100644
+--- a/caffe2/CMakeLists.txt
++++ b/caffe2/CMakeLists.txt
+@@ -46,12 +46,19 @@ if (INTERN_BUILD_ATEN_OPS)
+   list(APPEND Caffe2_DEPENDENCY_INCLUDE ${ATen_THIRD_PARTY_INCLUDE})
+ endif()
+ 
++# {Q/X,etc} NPACK support is enabled by default, if none of these options
++# are selected, turn this flag ON to incidate the support is disabled
++set(NNPACK_AND_FAMILY_DISABLED OFF)
++if(NOT (USE_NNPACK OR USE_QNNPACK OR USE_PYTORCH_QNNPACK OR USE_XNNPACK))
++  set(NNPACK_AND_FAMILY_DISABLED ON)
++endif()
++
+ # ---[ Caffe2 build
+ # Note: the folders that are being commented out have not been properly
+ # addressed yet.
+ 
+ # For pthreadpool_new_if_impl. TODO: Remove when threadpools are unitied.
+-if (NOT MSVC)
++if (NOT MSVC AND NOT NNPACK_AND_FAMILY_DISABLED)
+   IF(NOT TARGET fxdiv)
+     SET(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
+     SET(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
+@@ -710,7 +717,7 @@ ELSEIF(USE_CUDA)
+ ENDIF()
+ 
+ 
+-if (NOT MSVC)
++if (NOT MSVC AND NOT NNPACK_AND_FAMILY_DISABLED)
+   TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
+ endif()
+ 
+diff --git a/caffe2/utils/CMakeLists.txt b/caffe2/utils/CMakeLists.txt
+index 27aabb1315..3c7845c67d 100644
+--- a/caffe2/utils/CMakeLists.txt
++++ b/caffe2/utils/CMakeLists.txt
+@@ -36,7 +36,7 @@ list(APPEND Caffe2_CPU_SRCS
+ # ---[ threadpool/pthreadpool* is a local modification of the NNPACK
+ # pthreadpool with a very similar interface. Neither NNPACK, nor this
+ # thread pool supports Windows.
+-if (NOT MSVC)
++if (NOT MSVC AND NOT NNPACK_AND_FAMILY_DISABLED)
+   add_definitions(-DUSE_INTERNAL_THREADPOOL_IMPL)
+   set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS}
+           utils/threadpool/pthreadpool.cc