mdds
Loading...
Searching...
No Matches
aos/block_util.hpp
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
3// SPDX-FileCopyrightText: 2021 - 2025 Kohei Yoshida
4//
5// SPDX-License-Identifier: MIT
6
7#pragma once
8
9#include "mdds/global.hpp"
10#include "../types.hpp"
11
12namespace mdds { namespace mtv { namespace aos { namespace detail {
13
14template<typename Blks, lu_factor_t F>
16{
17 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
18 {
19 static_assert(
20 mdds::detail::invalid_static_int<F>, "The loop-unrolling factor must be one of 0, 4, 8, 16, or 32.");
21 }
22};
23
24template<typename Blks>
25struct adjust_block_positions<Blks, lu_factor_t::none>
26{
27 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
28 {
29 int64_t n = blocks.size();
30
31 if (start_block_index >= n)
32 return;
33
34#if MDDS_USE_OPENMP
35#pragma omp parallel for
36#endif
37 for (int64_t i = start_block_index; i < n; ++i)
38 blocks[i].position += delta;
39 }
40};
41
42template<typename Blks>
43struct adjust_block_positions<Blks, lu_factor_t::lu4>
44{
45 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
46 {
47 int64_t n = blocks.size();
48
49 if (start_block_index >= n)
50 return;
51
52 // Ensure that the section length is divisible by 4.
53 int64_t len = n - start_block_index;
54 int64_t rem = len & 3; // % 4
55 len -= rem;
56 len += start_block_index;
57#if MDDS_USE_OPENMP
58#pragma omp parallel for
59#endif
60 for (int64_t i = start_block_index; i < len; i += 4)
61 {
62 blocks[i].position += delta;
63 blocks[i + 1].position += delta;
64 blocks[i + 2].position += delta;
65 blocks[i + 3].position += delta;
66 }
67
68 rem += len;
69 for (int64_t i = len; i < rem; ++i)
70 blocks[i].position += delta;
71 }
72};
73
74template<typename Blks>
75struct adjust_block_positions<Blks, lu_factor_t::lu8>
76{
77 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
78 {
79 int64_t n = blocks.size();
80
81 if (start_block_index >= n)
82 return;
83
84 // Ensure that the section length is divisible by 8.
85 int64_t len = n - start_block_index;
86 int64_t rem = len & 7; // % 8
87 len -= rem;
88 len += start_block_index;
89#if MDDS_USE_OPENMP
90#pragma omp parallel for
91#endif
92 for (int64_t i = start_block_index; i < len; i += 8)
93 {
94 blocks[i].position += delta;
95 blocks[i + 1].position += delta;
96 blocks[i + 2].position += delta;
97 blocks[i + 3].position += delta;
98 blocks[i + 4].position += delta;
99 blocks[i + 5].position += delta;
100 blocks[i + 6].position += delta;
101 blocks[i + 7].position += delta;
102 }
103
104 rem += len;
105 for (int64_t i = len; i < rem; ++i)
106 blocks[i].position += delta;
107 }
108};
109
110template<typename Blks>
111struct adjust_block_positions<Blks, lu_factor_t::lu16>
112{
113 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
114 {
115 int64_t n = blocks.size();
116
117 if (start_block_index >= n)
118 return;
119
120 // Ensure that the section length is divisible by 16.
121 int64_t len = n - start_block_index;
122 int64_t rem = len & 15; // % 16
123 len -= rem;
124 len += start_block_index;
125#if MDDS_USE_OPENMP
126#pragma omp parallel for
127#endif
128 for (int64_t i = start_block_index; i < len; i += 16)
129 {
130 blocks[i].position += delta;
131 blocks[i + 1].position += delta;
132 blocks[i + 2].position += delta;
133 blocks[i + 3].position += delta;
134 blocks[i + 4].position += delta;
135 blocks[i + 5].position += delta;
136 blocks[i + 6].position += delta;
137 blocks[i + 7].position += delta;
138 blocks[i + 8].position += delta;
139 blocks[i + 9].position += delta;
140 blocks[i + 10].position += delta;
141 blocks[i + 11].position += delta;
142 blocks[i + 12].position += delta;
143 blocks[i + 13].position += delta;
144 blocks[i + 14].position += delta;
145 blocks[i + 15].position += delta;
146 }
147
148 rem += len;
149 for (int64_t i = len; i < rem; ++i)
150 blocks[i].position += delta;
151 }
152};
153
154template<typename Blks>
155struct adjust_block_positions<Blks, lu_factor_t::lu32>
156{
157 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
158 {
159 int64_t n = blocks.size();
160
161 if (start_block_index >= n)
162 return;
163
164 // Ensure that the section length is divisible by 32.
165 int64_t len = n - start_block_index;
166 int64_t rem = len & 31; // % 32
167 len -= rem;
168 len += start_block_index;
169#if MDDS_USE_OPENMP
170#pragma omp parallel for
171#endif
172 for (int64_t i = start_block_index; i < len; i += 32)
173 {
174 blocks[i].position += delta;
175 blocks[i + 1].position += delta;
176 blocks[i + 2].position += delta;
177 blocks[i + 3].position += delta;
178 blocks[i + 4].position += delta;
179 blocks[i + 5].position += delta;
180 blocks[i + 6].position += delta;
181 blocks[i + 7].position += delta;
182 blocks[i + 8].position += delta;
183 blocks[i + 9].position += delta;
184 blocks[i + 10].position += delta;
185 blocks[i + 11].position += delta;
186 blocks[i + 12].position += delta;
187 blocks[i + 13].position += delta;
188 blocks[i + 14].position += delta;
189 blocks[i + 15].position += delta;
190 blocks[i + 16].position += delta;
191 blocks[i + 17].position += delta;
192 blocks[i + 18].position += delta;
193 blocks[i + 19].position += delta;
194 blocks[i + 20].position += delta;
195 blocks[i + 21].position += delta;
196 blocks[i + 22].position += delta;
197 blocks[i + 23].position += delta;
198 blocks[i + 24].position += delta;
199 blocks[i + 25].position += delta;
200 blocks[i + 26].position += delta;
201 blocks[i + 27].position += delta;
202 blocks[i + 28].position += delta;
203 blocks[i + 29].position += delta;
204 blocks[i + 30].position += delta;
205 blocks[i + 31].position += delta;
206 }
207
208 rem += len;
209 for (int64_t i = len; i < rem; ++i)
210 blocks[i].position += delta;
211 }
212};
213
214}}}} // namespace mdds::mtv::aos::detail
215
216/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition aos/block_util.hpp:16