WarpX
Loading...
Searching...
No Matches
ChargeDeposition.H
Go to the documentation of this file.
1/* Copyright 2019 Axel Huebl, Andrew Myers, David Grote, Maxence Thevenet
2 * Weiqun Zhang
3 *
4 * This file is part of WarpX.
5 *
6 * License: BSD-3-Clause-LBNL
7 */
8#ifndef WARPX_CHARGEDEPOSITION_H_
9#define WARPX_CHARGEDEPOSITION_H_
10
15#ifdef WARPX_DIM_RZ
16# include "Utils/WarpX_Complex.H"
17#endif
18
19#include <AMReX.H>
20
21/* \brief Perform charge deposition on a tile
22 * \param GetPosition A functor for returning the particle position.
23 * \param wp Pointer to array of particle weights.
24 * \param ion_lev Pointer to array of particle ionization level. This is
25 required to have the charge of each macroparticle
26 since q is a scalar. For non-ionizable species,
27 ion_lev is a null pointer.
28 * \param rho_fab FArrayBox of charge density, either full array or tile.
29 * \param np_to_deposit Number of particles for which current is deposited.
30 * \param dinv 3D cell size inverse
31 * \param xyzmin The lower bounds of the domain
32 * \param lo Index lower bounds of domain.
33 * \param q species charge.
34 * \param n_rz_azimuthal_modes Number of azimuthal modes when using RZ geometry.
35 */
36template <int depos_order>
38 const amrex::ParticleReal * const wp,
39 const int* ion_lev,
40 amrex::FArrayBox& rho_fab,
41 long np_to_deposit,
42 const amrex::XDim3 & dinv,
43 const amrex::XDim3 & xyzmin,
44 amrex::Dim3 lo,
45 amrex::Real q,
46 [[maybe_unused]] int n_rz_azimuthal_modes)
47{
48 using namespace amrex;
49
50 // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
51 // (do_ionization=1)
52 const bool do_ionization = ion_lev;
53
54 const amrex::Real invvol = dinv.x*dinv.y*dinv.z;
55
56 amrex::Array4<amrex::Real> const& rho_arr = rho_fab.array();
57 amrex::IntVect const rho_type = rho_fab.box().type();
58
59 constexpr int NODE = amrex::IndexType::NODE;
60 constexpr int CELL = amrex::IndexType::CELL;
61
62 // Loop over particles and deposit into rho_fab
64 np_to_deposit,
65 [=] AMREX_GPU_DEVICE (long ip) {
66 // --- Get particle quantities
67 amrex::Real wq = q*wp[ip]*invvol;
68 if (do_ionization){
69 wq *= ion_lev[ip];
70 }
71
72 amrex::ParticleReal xp, yp, zp;
73 GetPosition(ip, xp, yp, zp);
74
75 // --- Compute shape factors
76 Compute_shape_factor< depos_order > const compute_shape_factor;
77#if !defined(WARPX_DIM_1D_Z)
78 // x direction
79 // Get particle position in grid coordinates
80#if defined(WARPX_DIM_RZ)
81 const amrex::Real rp = std::sqrt(xp*xp + yp*yp);
82 const amrex::Real costheta = (rp > 0._rt ? xp/rp : 1._rt);
83 const amrex::Real sintheta = (rp > 0._rt ? yp/rp : 0._rt);
84 const Complex xy0 = Complex{costheta, sintheta};
85 const amrex::Real x = (rp - xyzmin.x)*dinv.x;
86#elif defined(WARPX_DIM_RCYLINDER)
87 const amrex::Real rp = std::sqrt(xp*xp + yp*yp);
88 const amrex::Real x = (rp - xyzmin.x)*dinv.x;
89#elif defined(WARPX_DIM_RSPHERE)
90 const amrex::Real rp = std::sqrt(xp*xp + yp*yp + zp*zp);
91 const amrex::Real x = (rp - xyzmin.x)*dinv.x;
92#else
93 const amrex::Real x = (xp - xyzmin.x)*dinv.x;
94#endif
95
96 // Compute shape factor along x
97 // i: leftmost grid point that the particle touches
98 amrex::Real sx[depos_order + 1] = {0._rt};
99 int i = 0;
100 if (rho_type[0] == NODE) {
101 i = compute_shape_factor(sx, x);
102 } else if (rho_type[0] == CELL) {
103 i = compute_shape_factor(sx, x - 0.5_rt);
104 }
105#endif // !defined(WARPX_DIM_1D_Z)
106#if defined(WARPX_DIM_3D)
107 // y direction
108 const amrex::Real y = (yp - xyzmin.y)*dinv.y;
109 amrex::Real sy[depos_order + 1] = {0._rt};
110 int j = 0;
111 if (rho_type[1] == NODE) {
112 j = compute_shape_factor(sy, y);
113 } else if (rho_type[1] == CELL) {
114 j = compute_shape_factor(sy, y - 0.5_rt);
115 }
116#endif
117#if !defined(WARPX_DIM_RCYLINDER) && !defined(WARPX_DIM_RSPHERE)
118 // z direction
119 const amrex::Real z = (zp - xyzmin.z)*dinv.z;
120 amrex::Real sz[depos_order + 1] = {0._rt};
121 int k = 0;
122 if (rho_type[WARPX_ZINDEX] == NODE) {
123 k = compute_shape_factor(sz, z);
124 } else if (rho_type[WARPX_ZINDEX] == CELL) {
125 k = compute_shape_factor(sz, z - 0.5_rt);
126 }
127#endif
128
129 // Deposit charge into rho_arr
130#if defined(WARPX_DIM_1D_Z)
131 for (int iz=0; iz<=depos_order; iz++){
133 &rho_arr(lo.x+k+iz, 0, 0, 0),
134 sz[iz]*wq);
135 }
136#elif defined(WARPX_DIM_RCYLINDER) || defined(WARPX_DIM_RSPHERE)
137 for (int ix=0; ix<=depos_order; ix++){
139 &rho_arr(lo.x+i+ix, 0, 0, 0),
140 sx[ix]*wq);
141 }
142#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
143 for (int iz=0; iz<=depos_order; iz++){
144 for (int ix=0; ix<=depos_order; ix++){
146 &rho_arr(lo.x+i+ix, lo.y+k+iz, 0, 0),
147 sx[ix]*sz[iz]*wq);
148#if defined(WARPX_DIM_RZ)
149 Complex xy = xy0; // Throughout the following loop, xy takes the value e^{i m theta}
150 for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
151 // The factor 2 on the weighting comes from the normalization of the modes
152 amrex::Gpu::Atomic::AddNoRet( &rho_arr(lo.x+i+ix, lo.y+k+iz, 0, 2*imode-1), 2._rt*sx[ix]*sz[iz]*wq*xy.real());
153 amrex::Gpu::Atomic::AddNoRet( &rho_arr(lo.x+i+ix, lo.y+k+iz, 0, 2*imode ), 2._rt*sx[ix]*sz[iz]*wq*xy.imag());
154 xy = xy*xy0;
155 }
156#endif
157 }
158 }
159#elif defined(WARPX_DIM_3D)
160 for (int iz=0; iz<=depos_order; iz++){
161 for (int iy=0; iy<=depos_order; iy++){
162 for (int ix=0; ix<=depos_order; ix++){
164 &rho_arr(lo.x+i+ix, lo.y+j+iy, lo.z+k+iz),
165 sx[ix]*sy[iy]*sz[iz]*wq);
166 }
167 }
168 }
169#endif
170 }
171 );
172}
173
174/* \brief Perform charge deposition on a tile using shared memory
175 * \param GetPosition A functor for returning the particle position.
176 * \param wp Pointer to array of particle weights.
177 * \param ion_lev Pointer to array of particle ionization level. This is
178 required to have the charge of each macroparticle
179 since q is a scalar. For non-ionizable species,
180 ion_lev is a null pointer.
181 * \param rho_fab FArrayBox of charge density, either full array or tile.
182 * \param ix_type
183 * \param np_to_deposit Number of particles for which charge is deposited.
184 * \param dinv 3D cell size inverse
185 * \param xyzmin The lower bounds of the domain
186 * \param lo Index lower bounds of domain.
187 * \param q species charge.
188 * \param n_rz_azimuthal_modes Number of azimuthal modes when using RZ geometry.
189 * \param a_bins
190 * \param box
191 * \param geom
192 * \param a_tbox_max_size
193 * \param bin_size tile size to use for shared current deposition operations
194 */
195template <int depos_order>
197 const amrex::ParticleReal * const wp,
198 const int* ion_lev,
199 amrex::FArrayBox& rho_fab,
200 const amrex::IntVect& ix_type,
201 const long np_to_deposit,
202 const amrex::XDim3 & dinv,
203 const amrex::XDim3 & xyzmin,
204 const amrex::Dim3 lo,
205 const amrex::Real q,
206 [[maybe_unused]]const int n_rz_azimuthal_modes,
208 const amrex::Box& box,
209 const amrex::Geometry& geom,
210 const amrex::IntVect& a_tbox_max_size,
211 const amrex::IntVect bin_size
212 )
213{
214 using namespace amrex;
215
216 const auto *permutation = a_bins.permutationPtr();
217
218#if !defined(AMREX_USE_GPU)
219 amrex::ignore_unused(ix_type, a_bins, box, geom, a_tbox_max_size, bin_size);
220#endif
221
222 // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
223 // (do_ionization=1)
224 const bool do_ionization = ion_lev;
225
226 const amrex::Real invvol = dinv.x*dinv.y*dinv.z;
227
228 amrex::Array4<amrex::Real> const& rho_arr = rho_fab.array();
229 auto rho_box = rho_fab.box();
230 amrex::IntVect const rho_type = rho_box.type();
231
232 constexpr int NODE = amrex::IndexType::NODE;
233 constexpr int CELL = amrex::IndexType::CELL;
234
235 // Loop over particles and deposit into rho_fab
236#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
237 const auto plo = geom.ProbLoArray();
238 const auto domain = geom.Domain();
239
240 const amrex::Box sample_tbox(IntVect(AMREX_D_DECL(0, 0, 0)), a_tbox_max_size - 1);
241 amrex::Box sample_tbox_x = convert(sample_tbox, ix_type);
242
243 sample_tbox_x.grow(depos_order);
244
245 const auto npts = sample_tbox_x.numPts();
246
247 const int nblocks = a_bins.numBins();
248 const auto offsets_ptr = a_bins.offsetsPtr();
249 const int threads_per_block = 256;
250
251 std::size_t shared_mem_bytes = npts*sizeof(amrex::Real);
252
253 const std::size_t max_shared_mem_bytes = amrex::Gpu::Device::sharedMemPerBlock();
254
255 WARPX_ALWAYS_ASSERT_WITH_MESSAGE(shared_mem_bytes <= max_shared_mem_bytes,
256 "Tile size too big for GPU shared memory charge deposition");
257#endif
258
259#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
260 amrex::ignore_unused(np_to_deposit);
261 // Loop with one block per tile (the shared memory is allocated on a per-block basis)
262 // The threads within each block loop over the particles of its tile
263 // (Each threads processes a different set of particles.)
265 nblocks, threads_per_block, shared_mem_bytes, amrex::Gpu::gpuStream(),
266 [=] AMREX_GPU_DEVICE () noexcept
267#else // defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
268 amrex::ParallelFor(np_to_deposit, [=] AMREX_GPU_DEVICE (long ip_orig) noexcept
269#endif
270 {
271#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
272 const int bin_id = blockIdx.x;
273 const unsigned int bin_start = offsets_ptr[bin_id];
274 const unsigned int bin_stop = offsets_ptr[bin_id+1];
275
276 if (bin_start == bin_stop) { return; }
277
278 amrex::Box buffer_box;
279 {
280 ParticleReal xp, yp, zp;
281 GetPosition(permutation[bin_start], xp, yp, zp);
282#if defined(WARPX_DIM_3D)
283 IntVect iv = IntVect(int(amrex::Math::floor((xp-plo[0])*dinv.x)),
284 int(amrex::Math::floor((yp-plo[1])*dinv.y)),
285 int(amrex::Math::floor((zp-plo[2])*dinv.z)));
286#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
287 IntVect iv = IntVect(int(amrex::Math::floor((xp-plo[0])*dinv.x)),
288 int(amrex::Math::floor((zp-plo[1])*dinv.z)));
289#elif defined(WARPX_DIM_1D_Z)
290 IntVect iv = IntVect(int(amrex::Math::floor((zp-plo[0])*dinv.z)));
291#elif defined(WARPX_DIM_RCYLINDER) || defined(WARPX_DIM_RSPHERE)
292 IntVect iv = IntVect(int(amrex::Math::floor((xp-plo[0])*dinv.x)));
293#endif
294 iv += domain.smallEnd();
295 getTileIndex(iv, box, true, bin_size, buffer_box);
296 }
297
298 Box tbx = convert( buffer_box, ix_type);
299 tbx.grow(depos_order);
300
302 amrex::Real* const shared = gsm.dataPtr();
303
304 amrex::Array4<amrex::Real> buf(shared, amrex::begin(tbx), amrex::end(tbx), 1);
305
306 // Zero-initialize the temporary array in shared memory
307 volatile amrex::Real* vs = shared;
308 for (int i = threadIdx.x; i < tbx.numPts(); i += blockDim.x) {
309 vs[i] = 0.0;
310 }
311 __syncthreads();
312#else
313 amrex::Array4<amrex::Real> const &buf = rho_arr;
314#endif // defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
315
316#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
317 // Loop over macroparticles: each threads loops over particles with a stride of `blockDim.x`
318 for (unsigned int ip_orig = bin_start + threadIdx.x; ip_orig < bin_stop; ip_orig += blockDim.x)
319#endif
320 {
321 const unsigned int ip = permutation[ip_orig];
322
323 // --- Get particle quantities
324 amrex::Real wq = q*wp[ip]*invvol;
325 if (do_ionization){
326 wq *= ion_lev[ip];
327 }
328
329 amrex::ParticleReal xp, yp, zp;
330 GetPosition(ip, xp, yp, zp);
331
332 // --- Compute shape factors
333 Compute_shape_factor< depos_order > const compute_shape_factor;
334#if !defined(WARPX_DIM_1D_Z)
335 // x direction
336 // Get particle position in grid coordinates
337#if defined(WARPX_DIM_RZ)
338 const amrex::Real rp = std::sqrt(xp*xp + yp*yp);
339 amrex::Real costheta;
340 amrex::Real sintheta;
341 if (rp > 0.) {
342 costheta = xp/rp;
343 sintheta = yp/rp;
344 } else {
345 costheta = 1._rt;
346 sintheta = 0._rt;
347 }
348 const Complex xy0 = Complex{costheta, sintheta};
349 const amrex::Real x = (rp - xyzmin.x)*dinv.x;
350#elif defined(WARPX_DIM_RCYLINDER)
351 const amrex::Real rp = std::sqrt(xp*xp + yp*yp);
352 const amrex::Real x = (rp - xyzmin.x)*dinv.x;
353#elif defined(WARPX_DIM_RSPHERE)
354 const amrex::Real rp = std::sqrt(xp*xp + yp*yp + zp*zp);
355 const amrex::Real x = (rp - xyzmin.x)*dinv.x;
356#else
357 const amrex::Real x = (xp - xyzmin.x)*dinv.x;
358#endif
359
360 // Compute shape factor along x
361 // i: leftmost grid point that the particle touches
362 amrex::Real sx[depos_order + 1] = {0._rt};
363 int i = 0;
364 if (rho_type[0] == NODE) {
365 i = compute_shape_factor(sx, x);
366 } else if (rho_type[0] == CELL) {
367 i = compute_shape_factor(sx, x - 0.5_rt);
368 }
369#endif //defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) || defined(WARPX_DIM_3D)
370#if defined(WARPX_DIM_3D)
371 // y direction
372 const amrex::Real y = (yp - xyzmin.y)*dinv.y;
373 amrex::Real sy[depos_order + 1] = {0._rt};
374 int j = 0;
375 if (rho_type[1] == NODE) {
376 j = compute_shape_factor(sy, y);
377 } else if (rho_type[1] == CELL) {
378 j = compute_shape_factor(sy, y - 0.5_rt);
379 }
380#endif
381#if !defined(WARPX_DIM_RCYLINDER) && !defined(WARPX_DIM_RSPHERE)
382 // z direction
383 const amrex::Real z = (zp - xyzmin.z)*dinv.z;
384 amrex::Real sz[depos_order + 1] = {0._rt};
385 int k = 0;
386 if (rho_type[WARPX_ZINDEX] == NODE) {
387 k = compute_shape_factor(sz, z);
388 } else if (rho_type[WARPX_ZINDEX] == CELL) {
389 k = compute_shape_factor(sz, z - 0.5_rt);
390 }
391#endif
392
393 // Deposit charge into buf
394#if defined(WARPX_DIM_1D_Z)
395 for (int iz=0; iz<=depos_order; iz++){
397 &buf(lo.x+k+iz, 0, 0, 0),
398 sz[iz]*wq);
399 }
400#elif defined(WARPX_DIM_RCYLINDER) || defined(WARPX_DIM_RSPHERE)
401 for (int ix=0; ix<=depos_order; ix++){
403 &buf(lo.x+i+ix, 0, 0, 0),
404 sx[ix]*wq);
405 }
406#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
407 for (int iz=0; iz<=depos_order; iz++){
408 for (int ix=0; ix<=depos_order; ix++){
410 &buf(lo.x+i+ix, lo.y+k+iz, 0, 0),
411 sx[ix]*sz[iz]*wq);
412#if defined(WARPX_DIM_RZ)
413 Complex xy = xy0; // Throughout the following loop, xy takes the value e^{i m theta}
414 for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
415 // The factor 2 on the weighting comes from the normalization of the modes
416 amrex::Gpu::Atomic::AddNoRet( &buf(lo.x+i+ix, lo.y+k+iz, 0, 2*imode-1), 2._rt*sx[ix]*sz[iz]*wq*xy.real());
417 amrex::Gpu::Atomic::AddNoRet( &buf(lo.x+i+ix, lo.y+k+iz, 0, 2*imode ), 2._rt*sx[ix]*sz[iz]*wq*xy.imag());
418 xy = xy*xy0;
419 }
420#endif
421 }
422 }
423#elif defined(WARPX_DIM_3D)
424 for (int iz=0; iz<=depos_order; iz++){
425 for (int iy=0; iy<=depos_order; iy++){
426 for (int ix=0; ix<=depos_order; ix++){
428 &buf(lo.x+i+ix, lo.y+j+iy, lo.z+k+iz),
429 sx[ix]*sy[iy]*sz[iz]*wq);
430 }
431 }
432 }
433#endif
434 }
435
436#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
437 __syncthreads();
438
439 addLocalToGlobal(tbx, rho_arr, buf);
440#endif // defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
441 }
442 );
443}
444
445#endif // WARPX_CHARGEDEPOSITION_H_
#define AMREX_GPU_DEVICE
#define AMREX_D_DECL(a, b, c)
void doChargeDepositionSharedShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const int *ion_lev, amrex::FArrayBox &rho_fab, const amrex::IntVect &ix_type, const long np_to_deposit, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, const amrex::Dim3 lo, const amrex::Real q, const int n_rz_azimuthal_modes, const amrex::DenseBins< WarpXParticleContainer::ParticleTileType::ParticleTileDataType > &a_bins, const amrex::Box &box, const amrex::Geometry &geom, const amrex::IntVect &a_tbox_max_size, const amrex::IntVect bin_size)
Definition ChargeDeposition.H:196
void doChargeDepositionShapeN(const GetParticlePosition< PIdx > &GetPosition, const amrex::ParticleReal *const wp, const int *ion_lev, amrex::FArrayBox &rho_fab, long np_to_deposit, const amrex::XDim3 &dinv, const amrex::XDim3 &xyzmin, amrex::Dim3 lo, amrex::Real q, int n_rz_azimuthal_modes)
Definition ChargeDeposition.H:37
#define WARPX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition TextMsg.H:13
amrex::GpuComplex< amrex::Real > Complex
Definition WarpX_Complex.H:22
NODE
const Box & box() const noexcept
Array4< T const > array() const noexcept
__host__ __device__ BoxND & grow(int i) noexcept
__host__ __device__ Long numPts() const noexcept
__host__ __device__ IntVectND< dim > type() const noexcept
index_type * offsetsPtr() noexcept
index_type * permutationPtr() noexcept
Long numBins() const noexcept
const Box & Domain() const noexcept
GpuArray< Real, 3 > ProbLoArray() const noexcept
static std::size_t sharedMemPerBlock() noexcept
__host__ __device__ AMREX_FORCE_INLINE void AddNoRet(T *sum, T value) noexcept
gpuStream_t gpuStream() noexcept
__host__ __device__ void ignore_unused(const Ts &...)
__host__ __device__ BoxND< dim > convert(const BoxND< dim > &b, const IntVectND< dim > &typ) noexcept
__host__ __device__ int getTileIndex(const IntVect &iv, const Box &box, const bool a_do_tiling, const IntVect &a_tile_size, Box &tbx)
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
BoxND< 3 > Box
void launch(T const &n, L &&f) noexcept
__host__ __device__ Dim3 begin(BoxND< dim > const &box) noexcept
IntVectND< 3 > IntVect
__host__ __device__ Dim3 end(BoxND< dim > const &box) noexcept
Definition ShapeFactors.H:29
Functor that can be used to extract the positions of the macroparticles inside a ParallelFor kernel.
Definition GetAndSetPosition.H:75
__host__ __device__ constexpr T real() const noexcept
__host__ __device__ constexpr T imag() const noexcept
__device__ T * dataPtr() noexcept