add complex test function and other stuff

termi-official · Abdelrahman912 · Dec 18, 2024 · Dec 20, 2024 · Dec 21, 2024 · Jan 6, 2025
commit 7339d1c2ad65c4b9543ed7d17f8d2da79b249e05
diff --git a/ext/cuda/cuda_memalloc.jl b/ext/cuda/cuda_memalloc.jl
@@ -13,8 +13,7 @@ end
 
 
 function (dsf::DynamicSharedMemFunction{N, Tv, Ti})() where {N, Tv, Ti}
-    mem_size = dsf.mem_size
-    offset = dsf.offset
+    (; mem_size, offset) = dsf
     return CUDA.@cuDynamicSharedMem(Tv, mem_size, offset)
 end
 
@@ -69,17 +68,17 @@ end
 # Global Memory Allocation  #
 #############################
 
-struct KeFeGlobalMem{LOCAL_MATRICES, LOCAL_VECTORS} <: AbstractDeviceGlobalMem
-    Kes::LOCAL_MATRICES ## global level allocation (i.e. memory for all blocks -> 3rd order tensor)
-    fes::LOCAL_VECTORS  ## global level allocation (i.e. memory for all blocks -> 2nd order tensor)
+struct KeFeGlobalMem{MatricesType, VectorsType} <: AbstractDeviceGlobalMem
+    Kes::MatricesType ## global level allocation (i.e. memory for all blocks -> 3rd order tensor)
+    fes::VectorsType  ## global level allocation (i.e. memory for all blocks -> 2nd order tensor)
 end
 
-struct FeGlobalMem{LOCAL_VECTORS} <: AbstractDeviceGlobalMem
-    fes::LOCAL_VECTORS  ## global level allocation (i.e. memory for all blocks -> 2nd order tensor)
+struct FeGlobalMem{VectorsType} <: AbstractDeviceGlobalMem
+    fes::VectorsType  ## global level allocation (i.e. memory for all blocks -> 2nd order tensor)
 end
 
-struct KeGlobalMem{LOCAL_MATRICES} <: AbstractDeviceGlobalMem
-    Kes::LOCAL_MATRICES ## global level allocation (i.e. memory for all blocks -> 3rd order tensor)
+struct KeGlobalMem{MatricesType} <: AbstractDeviceGlobalMem
+    Kes::MatricesType ## global level allocation (i.e. memory for all blocks -> 3rd order tensor)
 end
 
 

diff --git a/src/ferrite-addons/PR883.jl b/src/ferrite-addons/PR883.jl
@@ -10,7 +10,7 @@ struct QuadratureValuesIterator{VT,XT}
         return new{V, Nothing}(v, nothing)
     end
     function QuadratureValuesIterator(v::V, cell_coords::VT) where {V, VT <: AbstractArray}
-        #reinit!(v, cell_coords)
+        reinit!(v, cell_coords)
         return new{V, VT}(v, cell_coords)
     end
 end
@@ -257,11 +257,7 @@ Ferrite.getnquadpoints(cv::StaticCellValues) = length(cv.weights)
 Ferrite.getnbasefunctions(cv::StaticCellValues) = getnbasefunctions(cv.fv)
 Ferrite.getngeobasefunctions(cv::StaticCellValues) = getnbasefunctions(cv.gm)
 
-@inline function Ferrite.reinit!(cv::StaticCellValues{<:Any, <:Any, <:AbstractVector}, cell_coords::AbstractVector)
-    copyto!(cv.x, cell_coords)
-    #TODO: Also allow the cell::AbstracCell to be given and updated
-end
-@inline function Ferrite.reinit!(::StaticCellValues{<:Any, <:Any, Nothing}, ::AbstractVector)
+@inline function Ferrite.reinit!(::StaticCellValues, ::AbstractVector)
     nothing # Nothing to do on reinit if x is not saved.
 end
 
@@ -277,7 +273,6 @@ end
     return _quadrature_point_values(fe_v, q_point, cell_coords, detJ -> -1)
 end
 
-
 function _quadrature_point_values(fe_v::StaticCellValues, q_point::Int, cell_coords::AbstractVector, neg_detJ_err_fun::Function)
     #q_point bounds checked, ok to use @inbounds
     @inbounds begin

diff --git a/src/ferrite-addons/gpu/device_dofhandler.jl b/src/ferrite-addons/gpu/device_dofhandler.jl
@@ -1,17 +1,17 @@
 
 # # Utility which holds partial information for assembly.
-struct DeviceSubDofHandler{Ti<:Integer,VEC_IP,IndexType, IndexVectorType <: AbstractVector{IndexType},DHDataType} <: Ferrite.AbstractDofHandler
+struct DeviceSubDofHandler{Ti<:Integer,IPVectorType,IndexType, IndexVectorType <: AbstractVector{IndexType},DHDataType} <: Ferrite.AbstractDofHandler
     cellset::IndexVectorType
     field_names::IndexVectorType
-    field_interpolations::VEC_IP 
+    field_interpolations::IPVectorType 
     ndofs_per_cell::Ti
     dh_data::DHDataType #DeviceDofHandlerData
 end
 
 
 # Utility which holds partial information for assembly.
-struct DeviceDofHandlerData{sdim, G<:Ferrite.AbstractGrid{sdim}, IndexType, IndexVectorType <: AbstractVector{IndexType},Ti<: Integer} <: Ferrite.AbstractDofHandler
-    grid::G
+struct DeviceDofHandlerData{sdim, GridType<:Ferrite.AbstractGrid{sdim}, IndexType, IndexVectorType <: AbstractVector{IndexType},Ti<: Integer} <: Ferrite.AbstractDofHandler
+    grid::GridType
     cell_dofs::IndexVectorType
     cell_dofs_offset::IndexVectorType
     cell_to_subdofhandler::IndexVectorType

diff --git a/src/ferrite-addons/gpu/device_grid.jl b/src/ferrite-addons/gpu/device_grid.jl
@@ -8,8 +8,8 @@ end
 function DeviceGrid(
     cells::CellDataType,
     nodes::NodeDataType
-) where {C <: Ferrite.AbstractCell, CellDataType <: AbstractArray{C, 1}, NodeDataType <: AbstractArray{Node{dim, T}}} where {dim, T}
-return DeviceGrid{dim,C,T, CellDataType, NodeDataType}(cells, nodes)
+) where {C<:Ferrite.AbstractCell,CellDataType<:AbstractArray{C,1},NodeDataType<:AbstractArray{Node{dim,T}}} where {dim,T}
+    return DeviceGrid{dim,C,T,CellDataType,NodeDataType}(cells, nodes)
 end
 
 Ferrite.get_coordinate_type(::DeviceGrid{sdim, <:Any, T,<:Any,<:Any}) where {sdim, T} = Vec{sdim, T} # Node is baked into the mesh type.

diff --git a/src/ferrite-addons/gpu/device_iterator.jl b/src/ferrite-addons/gpu/device_iterator.jl
@@ -8,10 +8,10 @@ abstract type AbstractDeviceCellIterator end
 ncells(iterator::AbstractDeviceCellIterator) = iterator.n_cells ## any subtype has to have `n_cells` field
 
 
-struct DeviceCellIterator{SDH <: DeviceSubDofHandler, Ti <: Integer, CellMem<: AbstractCellMem} <: AbstractDeviceCellIterator
-    sdh::SDH
+struct DeviceCellIterator{SDHType <: DeviceSubDofHandler, Ti <: Integer, CellMemType<: AbstractCellMem} <: AbstractDeviceCellIterator
+    sdh::SDHType
     n_cells::Ti # depends whether we are iterating over all cells (i.e. all the dh) or a subset of cells (i.e. subdh)
-    cell_mem::CellMem
+    cell_mem::CellMemType
 end
 
 struct DeviceOutOfBoundCellIterator <: AbstractDeviceCellIterator end  # used to handle the case for out of bound threads
@@ -24,12 +24,12 @@ struct DeviceOutOfBoundCellIterator <: AbstractDeviceCellIterator end  # used to
 
 abstract type AbstractDeviceCellCache  end
 
-struct DeviceCellCache{Ti <: Integer, DOFS <: AbstractVector{Ti}, NN, NODES <: SVector{NN, Ti}, X, COORDS <: SVector{X}, CellMem<: AbstractCellMem} <: AbstractDeviceCellCache
-    coords::COORDS
-    dofs::DOFS
+struct DeviceCellCache{Ti <: Integer, DofsType <: AbstractVector{Ti}, NN, NodesType <: SVector{NN, Ti}, X, CoordinatesType <: SVector{X}, CellMemType<: AbstractCellMem} <: AbstractDeviceCellCache
+    coords::CoordinatesType
+    dofs::DofsType
     cellid::Ti
-    nodes::NODES
-    cell_mem::CellMem
+    nodes::NodesType
+    cell_mem::CellMemType
 end
 
 @inline function cellke(::AbstractDeviceCellCache)

diff --git a/test/gpu/test_operators.jl b/test/gpu/test_operators.jl
@@ -10,14 +10,11 @@ cs = CartesianCoordinateSystem(grid)
 
 
 protocol = AnalyticalTransmembraneStimulationProtocol(
-                AnalyticalCoefficient((x,t) -> 1.f0, CoordinateSystemCoefficient(cs)),
+                AnalyticalCoefficient((x,t) -> sin(2π * t) * exp(-norm(x)^2), CoordinateSystemCoefficient(cs)),
                 [SVector((0.f0, 1.f0))]
             )
 
 
-
-
-
 linop = Thunderbolt.LinearOperator(
     zeros(ndofs(dh)),
     protocol,
@@ -26,12 +23,11 @@ linop = Thunderbolt.LinearOperator(
 )
 
 Thunderbolt.update_operator!(linop,0.0)
-#@test linop.b ≈ [0.25, 0.5, 1.0, 0.5, 0.25, 0.5, 0.5, 0.25, 0.25]
 
 
 cuda_strategy = Thunderbolt.CudaAssemblyStrategy(Float32, Int32)
 cuda_op = Thunderbolt.init_linear_operator(cuda_strategy,protocol, qrc, dh);
-Thunderbolt.update_operator!(cuda_op,0.0)
+Thunderbolt.update_operator!(cuda_op,0.f0)
 
 
 @test Vector(cuda_op.b) ≈ linop.b