minor fix

termi-official · Abdelrahman912 · Dec 18, 2024 · Dec 20, 2024 · Dec 21, 2024 · Jan 6, 2025
commit ac9c80701356e1d73b2e4e98dc2ed71df77b1904
diff --git a/Project.toml b/Project.toml
@@ -33,7 +33,8 @@ UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
 Unrolled = "9602ed7d-8fef-5bc8-8597-8f21381861e8"
 WriteVTK = "64499a7a-5c06-52f2-abe2-ccb03c286192"
 
-
+[weakdeps]
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 
 [extensions]
 CuThunderboltExt = "CUDA"

diff --git a/ext/cuda/cuda_adapt.jl b/ext/cuda/cuda_adapt.jl
@@ -22,14 +22,7 @@ function Adapt.adapt_structure(to, cysc::CartesianCoordinateSystemCache)
     return CartesianCoordinateSystemCache(cs, cv)
 end
 
-function Adapt.adapt_structure(to, cv::CellValues)
-    fv = Adapt.adapt(to, StaticInterpolationValues(cv.fun_values))
-    gm = Adapt.adapt(to, StaticInterpolationValues(cv.geo_mapping))
-    n_quadoints = cv.qr.weights |> length
-    weights = Adapt.adapt(to, ntuple(i -> cv.qr.weights[i], n_quadoints))
-    return StaticCellValues(fv, gm, weights)
-end
-
+# TODO: not used in the current codebase
 function _convert_subdofhandler_to_gpu(cell_dofs, cell_dof_soffset, sdh::SubDofHandler)
     GPUSubDofHandler(
         cell_dofs,
@@ -41,10 +34,11 @@ function _convert_subdofhandler_to_gpu(cell_dofs, cell_dof_soffset, sdh::SubDofH
     )
 end
 
+# TODO: here or in ferrite-addons?
 function Adapt.adapt_structure(to, dh::DofHandler{sdim}) where sdim
     grid             = adapt_structure(to, dh.grid)
     # field_names      = Tuple(sym for sym in dh.field_names)
-    #IndexType        = eltype(dh.cell_dofs)\
+    #IndexType        = eltype(dh.cell_dofs)
     #IndexVectorType  = CuVector{IndexType}
     cell_dofs        = adapt(to, dh.cell_dofs .|> (i -> convert(Int32,i)) |> cu) # currently you cant create Dofhandler with Int32
     cell_dofs_offset = adapt(to, dh.cell_dofs_offset .|> (i -> convert(Int32,i)) |> cu)
@@ -58,17 +52,17 @@ function Adapt.adapt_structure(to, dh::DofHandler{sdim}) where sdim
         cell_dofs,
         cell_dofs_offset,
         cell_to_sdh,
-        dh.ndofs,
+        convert(Int32,dh.ndofs),
     )
     #return GPUDofHandler(dh, gpudata)
     return GPUDofHandler(gpudata)
 end
 
 
-
+# TODO: here or in ferrite-addons?
 function Adapt.adapt_structure(to, grid::Grid{sdim, cell_type, T}) where {sdim, cell_type, T}
     node_type = typeof(first(grid.nodes))
-    cells = Adapt.adapt_structure(to, grid.cells |> cu)
+    cells = Adapt.adapt_structure(to, grid.cells .|> (x -> Int32.(x.nodes)) .|> eltype(grid.cells) |> cu)
     nodes = Adapt.adapt_structure(to, grid.nodes |> cu)
     #TODO subdomain info
     return GPUGrid{sdim, cell_type, T, typeof(cells), typeof(nodes)}(cells, nodes)

diff --git a/ext/cuda/cuda_operator.jl b/ext/cuda/cuda_operator.jl
@@ -31,10 +31,10 @@ end
 
 function _init_linop_cuda(linop::LinearOperator)
     @unpack dh  = linop
-    n_cells = dh |> get_grid |> getncells |> Int32
+    n_cells = dh |> get_grid |> getncells |> (x -> convert(Int32, x))
     threads = convert(Int32, min(n_cells, 256))
     blocks = _calculate_nblocks(threads, n_cells)
-    n_basefuncs = ndofs_per_cell(dh) |> Int32
+    n_basefuncs = convert(Int32,ndofs_per_cell(dh)) 
     eles_caches = _setup_caches(linop)
     mem_alloc = try_allocate_shared_mem(RHSObject{Float32}, threads, n_basefuncs)
     mem_alloc isa Nothing || return CudaOperatorKernel(linop, threads, blocks, mem_alloc,eles_caches)
@@ -91,12 +91,11 @@ function Thunderbolt.update_operator!(op_ker::CudaOperatorKernel, time)
 end
 
 
-
 function _update_linear_operator_kernel!(b, dh_, eles_caches,mem_alloc, time)
     dh = dh_.gpudata
     for sdh_idx in 1:length(dh.subdofhandlers)
         element_cache = eles_caches[sdh_idx]
-        for cell in CellIterator(dh,convert(Int32, sdh_idx) ,mem_alloc)
+        for cell in CellIterator(dh, convert(Int32,sdh_idx) ,mem_alloc)
             bₑ = cellfe(cell)
             assemble_element!(bₑ, cell, element_cache, time)
             dofs = celldofs(cell)
@@ -107,8 +106,3 @@ function _update_linear_operator_kernel!(b, dh_, eles_caches,mem_alloc, time)
     end
     return nothing
 end
-
-
-function Thunderbolt.test_ext(x::Float64)
-    println("Hello from the CUDA backend")
-end
diff --git a/src/Thunderbolt.jl b/src/Thunderbolt.jl
@@ -245,6 +245,5 @@ export
     PressureFieldBC,
     BendingSpringBC,
     RobinBC,
-    ConstantPressureBC,
-    test_ext
+    ConstantPressureBC
 end
diff --git a/src/ferrite-addons/PR913.jl b/src/ferrite-addons/PR913.jl
@@ -304,11 +304,11 @@ end
 
 Ferrite.CellIterator(dh::GPUDofHandlerData, buffer_alloc::AbstractGlobalMemAlloc) = _cell_iterator(dh, -1,  dh |> get_grid |> getncells |> Int32, buffer_alloc) ## iterate over all cells
 
-function Ferrite.CellIterator(dh::GPUDofHandlerData,sdh_idx::Integer, buffer_alloc::AbstractGlobalMemAlloc)
+function Ferrite.CellIterator(dh::GPUDofHandlerData,sdh_idx::Ti, buffer_alloc::AbstractGlobalMemAlloc) where {Ti <: Integer}
     ## iterate over all cells in the subdomain
     # check if the subdomain index is valid
     sdh_idx ∉ 1:length(dh.subdofhandlers) && return CudaOutOfBoundCellIterator()
-    n_cells = dh.subdofhandlers[sdh_idx].cellset |> length |> Int32 
+    n_cells = dh.subdofhandlers[sdh_idx].cellset |> length |> (x -> convert(Ti, x)) 
     return _cell_iterator(dh, sdh_idx,n_cells, buffer_alloc)
 end
 
@@ -319,7 +319,7 @@ function Ferrite.CellIterator(dh::GPUDofHandlerData,sdh_idx::Integer, buffer_all
     ## iterate over all cells in the subdomain
     # check if the subdomain index is valid
     sdh_idx ∉ 1:length(dh.subdofhandlers) && return CudaOutOfBoundCellIterator()
-    n_cells = dh.subdofhandlers[sdh_idx].cellset |> length |> Int32 
+    n_cells = dh.subdofhandlers[sdh_idx].cellset |> length |> (x -> convert(typeof(dh.ndofs), x))  
     return _cell_iterator(dh, sdh_idx,n_cells, buffer_alloc)
 end
 
@@ -353,7 +353,7 @@ struct GPUCellCache{Ti <: Integer, DOFS <: AbstractVector{Ti}, NN, NODES <: SVec
 end
 
 
-function _makecache(iterator::AbstractCUDACellIterator, e::Integer)
+function _makecache(iterator::AbstractCUDACellIterator, e::Ti) where {Ti <: Integer}
     dh = iterator.dh
     grid = iterator.grid
     sdh_idx = iterator.sdh_idx
@@ -362,7 +362,7 @@ function _makecache(iterator::AbstractCUDACellIterator, e::Integer)
     cell = Ferrite.getcells(grid, e)
 
     # Extract the node IDs of the cell.
-    nodes = SVector(convert.(Int32, Ferrite.get_node_ids(cell))...)
+    nodes = SVector(convert.(Ti, Ferrite.get_node_ids(cell))...)
 
     # Extract the degrees of freedom for the cell.
     dofs = Ferrite.celldofs(dh, e)

diff --git a/src/gpu/gpu_operator.jl b/src/gpu/gpu_operator.jl
@@ -20,7 +20,3 @@ function update_operator!(::AbstractOperatorKernel, time)
     error("Not implemented")
 end
 
-
-function test_ext(x::AbstractFloat)
-    error("Not implemented")
-end
diff --git a/test/gpu/operators-test.jl b/test/gpu/operators-test.jl
@@ -1,12 +1,12 @@
-## TODO: Put test operators here or with cpu operators? 
 using Thunderbolt
 using CUDA
 using Test
 using StaticArrays
 
 
-
-grid = generate_grid(Quadrilateral, (2,2))
+left = Tensor{1, 2, Float32}((-1.0, -1.0)) # define the left bottom corner of the grid.
+right = Tensor{1, 2, Float32}((1.0, 1.0)) # define the right top corner of the grid.
+grid = generate_grid(Quadrilateral, (2,2),left,right)
 dh = DofHandler(grid)
 add!(dh, :u, Lagrange{RefQuadrilateral,1}())
 close!(dh)
@@ -19,8 +19,8 @@ propertynames(dh)
 
 
 protocol = AnalyticalTransmembraneStimulationProtocol(
-                AnalyticalCoefficient((x,t) -> 1.0, CoordinateSystemCoefficient(cs)),
-                [SVector((0.0, 1.0))]
+                AnalyticalCoefficient((x,t) -> 1.f0, CoordinateSystemCoefficient(cs)),
+                [SVector((0.f0, 1.f0))]
             )
 
 
@@ -43,7 +43,6 @@ cuda_op = Thunderbolt.init_linear_operator(CUDABackend,protocol, qrc, dh);
 Thunderbolt.update_operator!(cuda_op,0.0)
 
 
-
 @test Vector(cuda_op.op.b) ≈ linop.b