Skip to content
2 changes: 1 addition & 1 deletion .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ steps:
rocm: "*"
rocmgpu: "*"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 60
timeout_in_minutes: 90
matrix:
setup:
julia:
Expand Down
6 changes: 5 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8"

[weakdeps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
Expand All @@ -27,6 +28,7 @@ cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"

[extensions]
TensorKitAdaptExt = "Adapt"
TensorKitAMDGPUExt = "AMDGPU"
TensorKitCUDAExt = ["CUDA", "cuTENSOR"]
TensorKitChainRulesCoreExt = "ChainRulesCore"
TensorKitFiniteDifferencesExt = "FiniteDifferences"
Expand All @@ -35,6 +37,7 @@ TensorKitMooncakeExt = "Mooncake"
[compat]
Adapt = "4"
AllocCheck = "0.2.3"
AMDGPU = "2"
Aqua = "0.6, 0.7, 0.8"
ArgParse = "1.2.0"
CUDA = "5.9"
Expand Down Expand Up @@ -67,6 +70,7 @@ julia = "1.10"
[extras]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a"
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Expand All @@ -86,4 +90,4 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"

[targets]
test = ["ArgParse", "Adapt", "Aqua", "AllocCheck", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote", "Mooncake", "JET"]
test = ["ArgParse", "Adapt", "Aqua", "AllocCheck", "Combinatorics", "AMDGPU", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote", "Mooncake", "JET"]
20 changes: 20 additions & 0 deletions ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
module TensorKitAMDGPUExt

using AMDGPU, AMDGPU.rocBLAS, AMDGPU.rocSOLVER, LinearAlgebra
using AMDGPU: @allowscalar
import AMDGPU: rand as rocrand, rand! as rocrand!, randn as rocrandn, randn! as rocrandn!

using TensorKit
using TensorKit.Factorizations
using TensorKit.Strided
using TensorKit.Factorizations: AbstractAlgorithm
using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap, scalartype, project_symmetric_and_check
import TensorKit: randisometry, rand, randn

using TensorKit: MatrixAlgebraKit

using Random

include("roctensormap.jl")

end
166 changes: 166 additions & 0 deletions ext/TensorKitAMDGPUExt/roctensormap.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
const ROCTensorMap{T, S, N₁, N₂} = TensorMap{T, S, N₁, N₂, ROCVector{T, AMDGPU.Mem.HIPBuffer}}
const ROCTensor{T, S, N} = ROCTensorMap{T, S, N, 0}

const AdjointROCTensorMap{T, S, N₁, N₂} = AdjointTensorMap{T, S, N₁, N₂, ROCTensorMap{T, S, N₁, N₂}}

function ROCTensorMap(t::TensorMap{T, S, N₁, N₂, A}) where {T, S, N₁, N₂, A}
return ROCTensorMap{T, S, N₁, N₂}(ROCArray{T}(t.data), space(t))
end

# project_symmetric! doesn't yet work for GPU types, so do this on the host, then copy
function TensorKit.project_symmetric_and_check(::Type{T}, ::Type{A}, data::AbstractArray, V::TensorMapSpace; tol = sqrt(eps(real(float(eltype(data)))))) where {T, A <: ROCVector{T}}
h_t = TensorKit.TensorMapWithStorage{T, Vector{T}}(undef, V)
h_t = TensorKit.project_symmetric!(h_t, Array(data))
# verify result
isapprox(Array(reshape(data, dims(h_t))), convert(Array, h_t); atol = tol) ||
throw(ArgumentError("Data has non-zero elements at incompatible positions"))
return TensorKit.TensorMapWithStorage{T, A}(A(h_t.data), V)
end

for (fname, felt) in ((:zeros, :zero), (:ones, :one))
@eval begin
function AMDGPU.$fname(
codomain::TensorSpace{S},
domain::TensorSpace{S} = one(codomain)
) where {S <: IndexSpace}
return AMDGPU.$fname(codomain ← domain)
end
function AMDGPU.$fname(
::Type{T}, codomain::TensorSpace{S},
domain::TensorSpace{S} = one(codomain)
) where {T, S <: IndexSpace}
return AMDGPU.$fname(T, codomain ← domain)
end
AMDGPU.$fname(V::TensorMapSpace) = AMDGPU.$fname(Float64, V)
function AMDGPU.$fname(::Type{T}, V::TensorMapSpace) where {T}
t = ROCTensorMap{T}(undef, V)
fill!(t, $felt(T))
return t
end
end
end

for randfun in (:rocrand, :rocrandn)
randfun! = Symbol(randfun, :!)
@eval begin
# converting `codomain` and `domain` into `HomSpace`
function $randfun(
codomain::TensorSpace{S},
domain::TensorSpace{S} = one(codomain),
) where {S <: IndexSpace}
return $randfun(codomain ← domain)
end
function $randfun(
::Type{T}, codomain::TensorSpace{S},
domain::TensorSpace{S} = one(codomain),
) where {T, S <: IndexSpace}
return $randfun(T, codomain ← domain)
end
function $randfun(
rng::Random.AbstractRNG, ::Type{T},
codomain::TensorSpace{S},
domain::TensorSpace{S} = one(codomain),
) where {T, S <: IndexSpace}
return $randfun(rng, T, codomain ← domain)
end

# filling in default eltype
$randfun(V::TensorMapSpace) = $randfun(Float64, V)
function $randfun(rng::Random.AbstractRNG, V::TensorMapSpace)
return $randfun(rng, Float64, V)
end

# filling in default rng
function $randfun(::Type{T}, V::TensorMapSpace) where {T}
return $randfun(Random.default_rng(), T, V)
end

# implementation
function $randfun(
rng::Random.AbstractRNG, ::Type{T},
V::TensorMapSpace
) where {T}
t = ROCTensorMap{T}(undef, V)
$randfun!(rng, t)
return t
end

function $randfun!(rng::Random.AbstractRNG, t::ROCTensorMap)
for (_, b) in blocks(t)
$randfun!(rng, b)
end
return t
end
end
end

# Scalar implementation
#-----------------------
function TensorKit.scalar(t::ROCTensorMap{T, S, 0, 0}) where {T, S}
inds = findall(!iszero, t.data)
return isempty(inds) ? zero(scalartype(t)) : @allowscalar @inbounds t.data[only(inds)]
end

function Base.convert(
TT::Type{ROCTensorMap{T, S, N₁, N₂}},
t::AbstractTensorMap{<:Any, S, N₁, N₂}
) where {T, S, N₁, N₂}
if typeof(t) === TT
return t
else
tnew = TT(undef, space(t))
return copy!(tnew, t)
end
end

function LinearAlgebra.isposdef(t::ROCTensorMap)
domain(t) == codomain(t) ||
throw(SpaceMismatch("`isposdef` requires domain and codomain to be the same"))
InnerProductStyle(spacetype(t)) === EuclideanInnerProduct() || return false
for (c, b) in blocks(t)
# do our own hermitian check
isherm = MatrixAlgebraKit.ishermitian(b)
isherm || return false
isposdef(Hermitian(b)) || return false
end
return true
end

function Base.promote_rule(
::Type{<:TT₁},
::Type{<:TT₂}
) where {
S, N₁, N₂, TTT₁, TTT₂,
TT₁ <: ROCTensorMap{TTT₁, S, N₁, N₂},
TT₂ <: ROCTensorMap{TTT₂, S, N₁, N₂},
}
T = TensorKit.VectorInterface.promote_add(TTT₁, TTT₂)
return ROCTensorMap{T, S, N₁, N₂}
end

# ROCTensorMap exponentation:
function TensorKit.exp!(t::ROCTensorMap)
domain(t) == codomain(t) ||
error("Exponential of a tensor only exist when domain == codomain.")
!MatrixAlgebraKit.ishermitian(t) && throw(ArgumentError("`exp!` is currently only supported on hermitian AMDGPU tensors"))
for (c, b) in blocks(t)
copy!(b, parent(Base.exp(Hermitian(b))))
end
return t
end

# functions that don't map ℝ to (a subset of) ℝ
for f in (:sqrt, :log, :asin, :acos, :acosh, :atanh, :acoth)
sf = string(f)
@eval function Base.$f(t::ROCTensorMap)
domain(t) == codomain(t) ||
throw(SpaceMismatch("`$($sf)` of a tensor only exists when domain == codomain"))
!MatrixAlgebraKit.ishermitian(t) && throw(ArgumentError("`$($sf)` is currently only supported on hermitian AMDGPU tensors"))
T = complex(float(scalartype(t)))
tf = similar(t, T)
for (c, b) in blocks(t)
copy!(block(tf, c), parent($f(Hermitian(b))))
end
return tf
end
end
Loading
Loading