Line data Source code
1 : //# FFT2D.cc: implementation of FFT2D
2 : //# Copyright (C) 2016
3 : //# Associated Universities, Inc. Washington DC, USA.
4 : //#
5 : //# This library is free software; you can redistribute it and/or modify it
6 : //# under the terms of the GNU General Public License as published by
7 : //# the Free Software Foundation; either version 2 of the License, or (at your
8 : //# option) any later version.
9 : //#
10 : //# This library is distributed in the hope that it will be useful, but WITHOUT
11 : //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 : //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
13 : //# License for more details.
14 : //#
15 : //# You should have received a copy of the GNU General Public License
16 : //# along with this library; if not, write to the Free Software Foundation,
17 : //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 : //#
19 : //# Correspondence concerning AIPS++ should be adressed as follows:
20 : //# Internet email: casa-feedback@nrao.edu.
21 : //# Postal address: AIPS++ Project Office
22 : //# National Radio Astronomy Observatory
23 : //# 520 Edgemont Road
24 : //# Charlottesville, VA 22903-2475 USA
25 : //#
26 : //#
27 : //# $kgolap$
28 : //DEDICATED TO HONGLIN YE
29 :
30 : #include <casacore/casa/Arrays/ArrayMath.h>
31 : #include <casacore/casa/Arrays/Array.h>
32 : #include <casacore/casa/OS/HostInfo.h>
33 : #include <synthesis/Utilities/FFT2D.h>
34 : #include <casacore/lattices/Lattices/Lattice.h>
35 : #ifdef _OPENMP
36 : #include <omp.h>
37 : #endif
38 : using namespace casacore;
39 : namespace casa { //# NAMESPACE CASA - BEGIN
40 :
41 : // Utility function to try to give as much info as possible - CAS-12604
42 0 : void throw_programmer_error(Long nx_p, Long ny_p, Long x, Long y, const char *file,
43 : int line)
44 : {
45 0 : AipsError exc;
46 0 : ostringstream msg;
47 0 : msg << "Programmer error: called FFT2D with wrong size. In " << file << ":" << line
48 0 : << ", with nx_p: " << nx_p << ", ny_p: " << ny_p << ", x: " << x << ", y: " << y
49 0 : << "\n Stack trace: " << exc.getStackTrace();
50 0 : exc.setMessage(msg.str());
51 0 : throw(exc);
52 0 : }
53 :
54 9678 : FFT2D::FFT2D(Bool useFFTW):planC2C_forw_p(nullptr),planC2C_back_p(nullptr), planR2C_p(nullptr), planC2CD_forw_p(nullptr), planC2CD_back_p(nullptr), useFFTW_p(useFFTW), wsave_p(0), lsav_p(0), nx_p(-1), ny_p(-1) {
55 9678 : if(useFFTW_p){
56 9678 : Int numThreads=HostInfo::numCPUs(true);
57 : #ifdef _OPENMP
58 9678 : numThreads=omp_get_max_threads();
59 : #endif
60 9678 : fftwf_init_threads();
61 9678 : fftwf_plan_with_nthreads(numThreads);
62 : ///For double precision
63 9678 : fftw_init_threads();
64 9678 : fftw_plan_with_nthreads(numThreads);
65 : }
66 :
67 9678 : }
68 9674 : FFT2D::~FFT2D(){
69 9674 : if(useFFTW_p){
70 9674 : if(planC2CD_forw_p)
71 143 : fftw_destroy_plan(planC2CD_forw_p);
72 9674 : if(planC2C_forw_p)
73 589 : fftwf_destroy_plan(planC2C_forw_p);
74 9674 : if(planR2C_p)
75 20 : fftwf_destroy_plan(planR2C_p);
76 9674 : if(planC2CD_back_p)
77 1357 : fftw_destroy_plan(planC2CD_back_p);
78 9674 : if(planC2C_back_p)
79 205 : fftwf_destroy_plan(planC2C_back_p);
80 : ////Have to leak the cleanup part as it is thread unsafe to perform this, see CAS-12486
81 : // fftw_cleanup_threads();
82 : // fftw_cleanup();
83 : // fftwf_cleanup_threads();
84 : // fftwf_cleanup();
85 :
86 9674 : planC2CD_forw_p=nullptr;
87 9674 : planC2C_forw_p=nullptr;
88 9674 : planC2CD_back_p=nullptr;
89 9674 : planC2C_back_p=nullptr;
90 : }
91 9674 : }
92 :
93 5686 : FFT2D& FFT2D::operator=(const FFT2D& other){
94 5686 : if(this != &other){
95 : /*planC2C_forw_p=other.planC2C_forw_p;
96 : planR2C_p=other.planR2C_p;
97 : planC2CD_forw_p=other.planC2CD_forw_p;
98 : planC2C_back_p=other.planC2C_back_p;
99 : planC2CD_back_p=other.planC2CD_back_p;
100 : */
101 5686 : planC2C_forw_p=nullptr;
102 5686 : planR2C_p=nullptr;
103 5686 : planC2CD_forw_p=nullptr;
104 5686 : planC2C_back_p=nullptr;
105 5686 : planC2CD_back_p=nullptr;
106 : //cerr << "copy: planc2cd " << planC2CD_back_p << endl;
107 5686 : useFFTW_p=other.useFFTW_p;
108 5686 : wsave_p.resize(other.wsave_p.size());
109 5686 : wsave_p=other.wsave_p;
110 5686 : lsav_p=other.lsav_p;
111 5686 : nx_p=-1;
112 5686 : ny_p=-1;
113 :
114 : }
115 5686 : return *this;
116 : }
117 :
118 20 : void FFT2D::r2cFFT(Lattice<Complex>& out, Lattice<Float>& in){
119 :
120 20 : IPosition shp=in.shape();
121 20 : if(shp.nelements() <2)
122 0 : throw(AipsError("Lattice has to be 2 dimensional to use FFT2D"));
123 20 : Long x= in.shape()(0);
124 20 : Long y=in.shape()(1);
125 20 : if(out.shape()(0) < (x/2+1))
126 0 : throw(AipsError("out shape has to be x/2+1 in size for real to complex FFT2D"));
127 80 : for(uInt k=1; k < shp.nelements(); ++k){
128 60 : if(shp(k) != out.shape()(k))
129 0 : throw(AipsError("shapes of out lattice does not match in lattice for FFT2D"));
130 : }
131 20 : Long numplanes=shp.product()/x/y;
132 20 : IPosition blc(shp.nelements(), 0);
133 20 : IPosition shape=in.shape();
134 :
135 60 : for (uInt ax=2; ax < shp.nelements(); ++ax)
136 40 : shape(ax)=1;
137 20 : IPosition outshape=shape;
138 20 : outshape(0)=x/2+1;
139 :
140 20 : Array<Complex> arr;
141 20 : Array<Float> arrf;
142 : Bool isRef;
143 : Bool del;
144 : Bool delf;
145 : Complex *scr;
146 : Float *scrf;
147 :
148 :
149 :
150 40 : for (Long n=0; n< numplanes; ++n){
151 20 : isRef=out.getSlice(arr, blc, outshape);
152 20 : scr=arr.getStorage(del);
153 : ///Use this method rather than arrf=in.getSlice(blc,shape)
154 : ///as this may be a reference ..the other is a copy always...
155 : /// can gain 0.8s or so for a 10000x10000 array circa 2016
156 20 : in.getSlice( arrf, blc, shape);
157 20 : scrf=arrf.getStorage(delf);
158 20 : r2cFFT(scr, scrf, x, y);
159 20 : arr.putStorage(scr, del);
160 20 : arrf.putStorage(scrf, delf);
161 :
162 20 : if(!isRef){
163 0 : out.putSlice(arr, blc);
164 :
165 : }
166 : //Now calculate next plane
167 :
168 20 : Bool addNextAx=true;
169 60 : for (uInt ax=2; ax < shp.nelements(); ++ax){
170 40 : if(addNextAx){
171 40 : blc(ax) +=1;
172 40 : addNextAx=false;
173 : }
174 40 : if(blc(ax)> shp(ax)-1){
175 40 : blc(ax)=0;
176 40 : addNextAx=true;
177 : }
178 :
179 : }
180 :
181 : }
182 20 : }
183 20 : void FFT2D::r2cFFT(Complex*& out, Float*& in, Long x, Long y){
184 20 : if(x%2 != 0 || y%2 != 0)
185 0 : throw(AipsError("Programmer error: FFT2D does not deal with odd numbers on x-y plane"));
186 20 : fftShift(in, x, y);
187 20 : doFFT(out, in, x, y);
188 : //fft1_p.plan_r2c(IPosition(2,x,y), in, out);
189 : //fft1_p.r2c(IPosition(2,x,y), in, out);
190 : //flipArray out is of shape x/2+1, y
191 20 : Complex* scr=out;
192 20 : Matrix<Complex> tmpo(x/2+1, y/2);
193 : Bool gool;
194 20 : Complex* tmpptr=tmpo.getStorage(gool);
195 20 : #pragma omp parallel for default(none) firstprivate(x, y, tmpptr, scr)
196 : for (Long jj=0; jj< y/2; ++jj){
197 : for(Long ii=0; ii < (x/2+1); ++ii){
198 : tmpptr[jj*(x/2+1)+ii]=scr[jj*(x/2+1)+ii];
199 : scr[jj*(x/2+1)+ii]=scr[(y/2)*(x/2+1)+jj*(x/2+1)+ii];
200 : }
201 : }
202 20 : #pragma omp parallel for default(none) firstprivate(x, y, tmpptr, scr)
203 : for (Long jj=0; jj< y/2; ++jj){
204 : for(Long ii=0; ii < x/2; ++ii){
205 : scr[(y/2)*(x/2+1)+jj*(x/2+1)+ii]=tmpptr[jj*(x/2+1)+ii];
206 : }
207 : }
208 :
209 20 : }
210 1085 : void FFT2D::c2cFFT(Lattice<Complex>& inout, Bool toFreq){
211 1085 : IPosition shp=inout.shape();
212 1085 : if(shp.nelements() <2)
213 0 : throw(AipsError("Lattice has to be 2 dimensional to use FFT2D"));
214 1085 : Long x= inout.shape()(0);
215 1085 : Long y=inout.shape()(1);
216 1085 : Long numplanes=inout.shape().product()/x/y;
217 1085 : IPosition blc(inout.shape().nelements(), 0);
218 1085 : IPosition shape=inout.shape();
219 3255 : for (uInt ax=2; ax < shp.nelements(); ++ax)
220 2170 : shape(ax)=1;
221 1085 : Array<Complex> arr;
222 : Bool isRef;
223 : Bool del;
224 : Complex *scr;
225 :
226 4735 : for (Long n=0; n< numplanes; ++n){
227 3650 : isRef=inout.getSlice(arr, blc, shape);
228 3650 : scr=arr.getStorage(del);
229 3650 : c2cFFT(scr, x, y, toFreq);
230 3650 : arr.putStorage(scr, del);
231 3650 : if(!isRef)
232 0 : inout.putSlice(arr, blc);
233 : //Now calculate next plane
234 3650 : Bool addNextAx=true;
235 10950 : for (uInt ax=2; ax < shp.nelements(); ++ax){
236 7300 : if(addNextAx){
237 7029 : blc(ax) +=1;
238 7029 : addNextAx=false;
239 : }
240 7300 : if(blc(ax)> shp(ax)-1){
241 4464 : blc(ax)=0;
242 4464 : addNextAx=true;
243 : }
244 :
245 : }
246 : }
247 1085 : }
248 1274 : void FFT2D::c2cFFTInDouble(Lattice<Complex>& inout, Bool toFreq){
249 1274 : IPosition shp=inout.shape();
250 1274 : if(shp.nelements() <2)
251 0 : throw(AipsError("Lattice has to be 2 dimensional to use FFT2D"));
252 1274 : Long x= inout.shape()(0);
253 1274 : Long y=inout.shape()(1);
254 1274 : Long numplanes=inout.shape().product()/x/y;
255 1274 : IPosition blc(inout.shape().nelements(), 0);
256 1274 : IPosition shape=inout.shape();
257 3822 : for (uInt ax=2; ax < shp.nelements(); ++ax)
258 2548 : shape(ax)=1;
259 1274 : Array<Complex> arr;
260 : Bool isRef;
261 : Bool del, delD;
262 : Complex *scr;
263 : DComplex *scrD;
264 1274 : Array<DComplex> arrD(shape);
265 1274 : scrD=arrD.getStorage(delD);
266 2548 : for (Long n=0; n< numplanes; ++n){
267 1274 : isRef=inout.getSlice(arr, blc, shape);
268 1274 : scr=arr.getStorage(del);
269 1274 : complexConvert(scrD, scr, (ooLong)shape(0)*(ooLong)shape(1), False);
270 1274 : c2cFFT(scrD, x, y, toFreq);
271 1274 : complexConvert(scrD, scr, (ooLong)shape(0)*(ooLong)shape(1), True);
272 1274 : arr.putStorage(scr, del);
273 1274 : if(!isRef)
274 0 : inout.putSlice(arr, blc);
275 : //Now calculate next plane
276 1274 : Bool addNextAx=true;
277 3822 : for (uInt ax=2; ax < shp.nelements(); ++ax){
278 2548 : if(addNextAx){
279 2548 : blc(ax) +=1;
280 2548 : addNextAx=false;
281 : }
282 2548 : if(blc(ax)> shp(ax)-1){
283 2548 : blc(ax)=0;
284 2548 : addNextAx=true;
285 : }
286 :
287 : }
288 : }
289 1274 : arrD.putStorage(scrD, delD);
290 1274 : }
291 2384 : void FFT2D::c2cFFT(Lattice<DComplex>& inout, Bool toFreq){
292 2384 : IPosition shp=inout.shape();
293 2384 : if(shp.nelements() <2)
294 0 : throw(AipsError("Lattice has to be 2 dimensional to use FFT2D"));
295 2384 : Long x= inout.shape()(0);
296 2384 : Long y=inout.shape()(1);
297 2384 : Long numplanes=inout.shape().product()/x/y;
298 2384 : IPosition blc(inout.shape().nelements(), 0);
299 2384 : IPosition shape=inout.shape();
300 7152 : for (uInt ax=2; ax < shp.nelements(); ++ax)
301 4768 : shape(ax)=1;
302 2384 : Array<DComplex> arr;
303 : Bool isRef;
304 : Bool del;
305 : DComplex *scr;
306 :
307 11199 : for (Long n=0; n< numplanes; ++n){
308 8815 : isRef=inout.getSlice(arr, blc, shape);
309 8815 : scr=arr.getStorage(del);
310 8815 : c2cFFT(scr, x, y, toFreq);
311 8815 : arr.putStorage(scr, del);
312 8815 : if(!isRef)
313 0 : inout.putSlice(arr, blc);
314 : //Now calculate next plane
315 8815 : Bool addNextAx=true;
316 26445 : for (uInt ax=2; ax < shp.nelements(); ++ax){
317 17630 : if(addNextAx){
318 16838 : blc(ax) +=1;
319 16838 : addNextAx=false;
320 : }
321 17630 : if(blc(ax)> shp(ax)-1){
322 10407 : blc(ax)=0;
323 10407 : addNextAx=true;
324 : }
325 :
326 : }
327 : }
328 2384 : }
329 :
330 3650 : void FFT2D::c2cFFT(Complex*& out, Long x, Long y, Bool toFreq){
331 3650 : if(x%2 != 0 || y%2 !=0)
332 0 : throw(AipsError("Programmer error: FFT2D does not deal with odd numbers on x-y plane"));
333 3650 : fftShift(out, x, y, true);
334 3650 : doFFT(out, x, y, toFreq);
335 : /*Int dim[2]={Int(x), Int(y)};
336 : if(toFreq){
337 :
338 : planC2C_p=fftwf_plan_dft(2, dim, reinterpret_cast<fftwf_complex *>(out), reinterpret_cast<fftwf_complex *>(out), FFTW_FORWARD, FFTW_ESTIMATE);
339 :
340 : //fft1_p.plan_c2c_forward(IPosition(2, x, y), out);
341 : }
342 : else{
343 : planC2C_p=fftwf_plan_dft(2, dim, reinterpret_cast<fftwf_complex *>(out), reinterpret_cast<fftwf_complex *>(out), FFTW_BACKWARD, FFTW_ESTIMATE);
344 : // fft1_p.plan_c2c_backward(IPosition(2, x, y), out);
345 : }
346 : fftwf_execute(planC2C_p);
347 : */
348 3650 : fftShift(out, x, y, toFreq);
349 :
350 3650 : }
351 10089 : void FFT2D::c2cFFT(DComplex*& out, Long x, Long y, Bool toFreq){
352 10089 : if(x%2 != 0 || y%2 !=0)
353 0 : throw(AipsError("Programmer error: FFT2D does not deal with odd numbers on x-y plane"));
354 10089 : fftShift(out, x, y, true);
355 10089 : doFFT(out, x, y, toFreq);
356 10089 : fftShift(out, x, y, toFreq);
357 :
358 10089 : }
359 10089 : void FFT2D::doFFT(DComplex*& out, Long x, Long y, Bool toFreq){
360 10089 : if(useFFTW_p){
361 : //Will need to seperate the plan from the execute if we want to run this in multiple threads
362 10089 : Int dim[2]={Int(y), Int(x)};
363 10089 : if(toFreq){
364 1274 : if(!planC2CD_forw_p){
365 145 : planC2CD_forw_p=fftw_plan_dft(2, dim, reinterpret_cast<fftw_complex *>(out), reinterpret_cast<fftw_complex *>(out), FFTW_FORWARD, FFTW_ESTIMATE);
366 145 : fftw_execute(planC2CD_forw_p);
367 145 : nx_p=x;
368 145 : ny_p=y;
369 : }
370 : else{
371 1129 : if((nx_p != x) || (ny_p !=y)) {
372 0 : throw_programmer_error(nx_p, ny_p, x, y, __FILE__, __LINE__);
373 : }
374 1129 : fftw_execute_dft(planC2CD_forw_p, reinterpret_cast<fftw_complex *>(out), reinterpret_cast<fftw_complex *>(out));
375 : }
376 : //fft1_p.plan_c2c_forward(IPosition(2, x, y), out);
377 : }
378 : else{
379 8815 : if(!planC2CD_back_p){
380 1357 : planC2CD_back_p=fftw_plan_dft(2, dim, reinterpret_cast<fftw_complex *>(out), reinterpret_cast<fftw_complex *>(out), FFTW_BACKWARD, FFTW_ESTIMATE);
381 1357 : fftw_execute(planC2CD_back_p);
382 1357 : nx_p=x;
383 1357 : ny_p=y;
384 : }
385 : else{
386 7458 : if((nx_p != x) || (ny_p !=y)) {
387 0 : throw_programmer_error(nx_p, ny_p, x, y, __FILE__, __LINE__);
388 : }
389 7458 : fftw_execute_dft(planC2CD_back_p, reinterpret_cast<fftw_complex *>(out), reinterpret_cast<fftw_complex *>(out));
390 : }
391 : // fft1_p.plan_c2c_backward(IPosition(2, x, y), out);
392 : }
393 : }
394 : else{
395 0 : throw(AipsError("Double precision FFT with FFTPack is not implemented"));
396 : }
397 10089 : }
398 3650 : void FFT2D::doFFT(Complex*& out, Long x, Long y, Bool toFreq){
399 3650 : if(useFFTW_p){
400 : //Will need to seperate the plan from the execute if we want to run this in multiple threads
401 3650 : Int dim[2]={Int(y), Int(x)};
402 3650 : if(toFreq){
403 3187 : if(!planC2C_forw_p){
404 591 : planC2C_forw_p=fftwf_plan_dft(2, dim, reinterpret_cast<fftwf_complex *>(out), reinterpret_cast<fftwf_complex *>(out), FFTW_FORWARD, FFTW_ESTIMATE);
405 591 : fftwf_execute(planC2C_forw_p);
406 591 : nx_p=x;
407 591 : ny_p=y;
408 :
409 : }
410 : else{
411 2596 : if((nx_p != x) || (ny_p !=y)) {
412 0 : throw_programmer_error(nx_p, ny_p, x, y, __FILE__, __LINE__);
413 : }
414 2596 : fftwf_execute_dft(planC2C_forw_p, reinterpret_cast<fftwf_complex *>(out), reinterpret_cast<fftwf_complex *>(out) );
415 : }
416 : //fft1_p.plan_c2c_forward(IPosition(2, x, y), out);
417 : }
418 : else{
419 463 : if(!planC2C_back_p){
420 207 : planC2C_back_p=fftwf_plan_dft(2, dim, reinterpret_cast<fftwf_complex *>(out), reinterpret_cast<fftwf_complex *>(out), FFTW_BACKWARD, FFTW_ESTIMATE);
421 207 : fftwf_execute(planC2C_back_p);
422 207 : nx_p=x;
423 207 : ny_p=y;
424 : }
425 : else{
426 256 : if((nx_p != x) || (ny_p !=y)) {
427 0 : throw_programmer_error(nx_p, ny_p, x, y, __FILE__, __LINE__);
428 : }
429 256 : fftwf_execute_dft(planC2C_back_p, reinterpret_cast<fftwf_complex *>(out), reinterpret_cast<fftwf_complex *>(out) );
430 : }
431 : // fft1_p.plan_c2c_backward(IPosition(2, x, y), out);
432 : }
433 :
434 :
435 : }
436 : else{
437 : Int ier;
438 0 : Int x1=Int(x);
439 0 : Int y1=Int(y);
440 0 : if(wsave_p.size()==0){
441 0 : wsave_p.resize(2*x1*y1+15);
442 0 : lsav_p=2*x1*y1+15;
443 0 : Float *wsaveptr=wsave_p.data();
444 0 : FFTPack::cfft2i(x1, y1, wsaveptr, lsav_p, ier);
445 : }
446 0 : std::vector<Float> work(2*x1*y1);
447 0 : Int lenwrk=2*x1*y1;
448 0 : Float* workptr=work.data();
449 0 : Float* wsaveptr=wsave_p.data();
450 0 : if(toFreq)
451 0 : FFTPack::cfft2f(x1, x1, y1, out, wsaveptr, lsav_p, workptr, lenwrk, ier);
452 : else
453 0 : FFTPack::cfft2b(x1, x1, y1, out, wsaveptr, lsav_p, workptr, lenwrk, ier);
454 0 : }
455 3650 : }
456 20 : void FFT2D::doFFT(Complex*& out, Float*& in, Long x, Long y){
457 20 : if(useFFTW_p){
458 20 : Int dim[2]={Int(y), Int(x)};
459 20 : if(!planR2C_p){
460 20 : planR2C_p=fftwf_plan_dft_r2c(2, dim, in, reinterpret_cast<fftwf_complex *>(out), FFTW_ESTIMATE);
461 :
462 : //fft1_p.plan_c2c_forward(IPosition(2, x, y), out);
463 :
464 20 : fftwf_execute(planR2C_p);
465 20 : nx_p=x;
466 20 : ny_p=y;
467 : }
468 : else{
469 0 : if((nx_p != x) || (ny_p !=y)) {
470 0 : throw_programmer_error(nx_p, ny_p, x, y, __FILE__, __LINE__);
471 : }
472 0 : fftwf_execute_dft_r2c(planR2C_p, in, reinterpret_cast<fftwf_complex *>(out));
473 : }
474 : }
475 : else{
476 : /*
477 : Int ier;
478 : Int x1=Int(x);
479 : Int y1=Int(y);
480 : if(wsave_p.size()==0){
481 : wsave_p.resize(2*x1*y1+15);
482 : lsav_p=2*x1*y1+15;
483 : Float *wsaveptr=wsave_p.data();
484 : FFTPack::cfft2i(x1, y1, wsaveptr, lsav_p, ier);
485 : }
486 : std::vector<Float> work(2*x1*y1);
487 : Int lenwrk=2*x1*y1;
488 : Float* workptr=work.data();
489 : Float* wsaveptr=wsave_p.data();
490 : if(toFreq)
491 : FFTPack::cfft2f(x1, y1, x1, out, wsaveptr, lsav_p, workptr, lenwrk, ier);
492 : else
493 : FFTPack::cfft2b(x1, y1, x1, out, wsaveptr, lsav_p, workptr, lenwrk, ier);
494 : */
495 0 : throw(AipsError("Not implemented FFTPack r2c yet"));
496 : }
497 20 : }
498 :
499 7300 : void FFT2D::fftShift(Complex*& s, Long x, Long y, Bool toFreq){
500 : ////Lets try our own flip
501 :
502 : Bool gool;
503 7300 : Complex* scr=s;
504 : {
505 7300 : Matrix<Complex> tmpo(x/2, y/2);
506 7300 : Complex* tmpptr=tmpo.getStorage(gool);
507 : ////TEST
508 : //omp_set_num_threads(1);
509 : /////
510 : /*
511 : #pragma omp parallel for default(none) firstprivate(x, y, tmpptr, scr)
512 : for (Long jj=0; jj< y/2; ++jj){
513 : for(Long ii=0; ii < x/2; ++ii){
514 : tmpptr[jj*x/2+ii]=scr[(y/2-jj-1)*x+(x/2-ii-1)];
515 : scr[(y/2)*x+(jj*x+x/2)+ii]=scr[jj*x+ii];
516 : }
517 : }
518 : */
519 7300 : Float divid=1.0f;
520 7300 : if(!toFreq)
521 463 : divid=1.0f/(Float(x)*Float(y));
522 7300 : #pragma omp parallel for default(none) firstprivate(x, y, tmpptr, scr, divid)
523 : for (Long jj=0; jj< y/2; ++jj){
524 : for(Long ii=0; ii < x/2; ++ii){
525 : tmpptr[jj*x/2+ii]=scr[(y/2)*x+(jj*x+x/2)+ii]*divid;
526 : scr[(y/2)*x+(jj*x+x/2)+ii]=scr[jj*x+ii]*divid;
527 : }
528 : }
529 7300 : #pragma omp parallel for default(none) firstprivate(x,y, tmpptr, scr)
530 : for (Long jj=0; jj< y/2; ++jj){
531 : for(Long ii=0; ii < x/2; ++ii){
532 : scr[jj*x+ii]=tmpptr[jj*x/2+ii];
533 : }
534 : }
535 7300 : #pragma omp parallel for default(none) firstprivate(x,y, tmpptr, scr, divid)
536 : for (Long jj=0; jj< y/2; ++jj){
537 : for(Long ii=0; ii < x/2; ++ii){
538 : tmpptr[jj*x/2+ii]=scr[(jj*x+x/2)+ii]*divid;
539 : scr[(jj*x+x/2)+ii]=scr[(y/2)*x+jj*x+ii]*divid;
540 : }
541 : }
542 7300 : #pragma omp parallel for default(none) firstprivate(x, y, tmpptr, scr)
543 : for (Long jj=0; jj< y/2; ++jj){
544 : for(Long ii=0; ii < x/2; ++ii){
545 : scr[(y/2)*x+jj*x+ii]=tmpptr[jj*x/2+ii];
546 : }
547 : }
548 7300 : tmpo.putStorage(tmpptr, gool);
549 7300 : }
550 :
551 : ////
552 :
553 : //if(rot)
554 : /*{
555 :
556 : Matrix<Complex> tmpo(x, y/2);
557 : Complex* tmpptr=tmpo.getStorage(gool);
558 : for (Long jj=0; jj< y/2; ++jj){
559 : for(Long ii=0; ii < x; ++ii){
560 : tmpptr[jj*x+ii]=scr[(y-jj-1)*x+(x-ii-1)];
561 : scr[(y-jj-1)*x+(x-ii-1)]=scr[jj*x+ii];
562 : }
563 : }
564 : for (Long jj=0; jj< y/2; ++jj){
565 : for(Long ii=0; ii < x; ++ii){
566 : scr[jj*x+ii]= tmpptr[jj*x+ii];
567 : }
568 : }
569 : }*/
570 :
571 :
572 :
573 :
574 7300 : }
575 :
576 20178 : void FFT2D::fftShift(DComplex*& s, Long x, Long y, Bool toFreq){
577 : ////Lets try our own flip
578 :
579 : Bool gool;
580 20178 : DComplex* scr=s;
581 : {
582 20178 : Matrix<DComplex> tmpo(x/2, y/2);
583 20178 : DComplex* tmpptr=tmpo.getStorage(gool);
584 : ////TEST
585 : //omp_set_num_threads(1);
586 : /////
587 : /*
588 : #pragma omp parallel for default(none) firstprivate(x, y, tmpptr, scr)
589 : for (Long jj=0; jj< y/2; ++jj){
590 : for(Long ii=0; ii < x/2; ++ii){
591 : tmpptr[jj*x/2+ii]=scr[(y/2-jj-1)*x+(x/2-ii-1)];
592 : scr[(y/2)*x+(jj*x+x/2)+ii]=scr[jj*x+ii];
593 : }
594 : }
595 : */
596 20178 : Double divid=1.0f;
597 20178 : if(!toFreq)
598 8815 : divid=1.0f/(Double(x)*Double(y));
599 20178 : #pragma omp parallel for default(none) firstprivate(x, y, tmpptr, scr, divid)
600 : for (Long jj=0; jj< y/2; ++jj){
601 : for(Long ii=0; ii < x/2; ++ii){
602 : tmpptr[jj*x/2+ii]=scr[(y/2)*x+(jj*x+x/2)+ii]*divid;
603 : scr[(y/2)*x+(jj*x+x/2)+ii]=scr[jj*x+ii]*divid;
604 : }
605 : }
606 20178 : #pragma omp parallel for default(none) firstprivate(x,y, tmpptr, scr)
607 : for (Long jj=0; jj< y/2; ++jj){
608 : for(Long ii=0; ii < x/2; ++ii){
609 : scr[jj*x+ii]=tmpptr[jj*x/2+ii];
610 : }
611 : }
612 20178 : #pragma omp parallel for default(none) firstprivate(x,y, tmpptr, scr, divid)
613 : for (Long jj=0; jj< y/2; ++jj){
614 : for(Long ii=0; ii < x/2; ++ii){
615 : tmpptr[jj*x/2+ii]=scr[(jj*x+x/2)+ii]*divid;
616 : scr[(jj*x+x/2)+ii]=scr[(y/2)*x+jj*x+ii]*divid;
617 : }
618 : }
619 20178 : #pragma omp parallel for default(none) firstprivate(x, y, tmpptr, scr)
620 : for (Long jj=0; jj< y/2; ++jj){
621 : for(Long ii=0; ii < x/2; ++ii){
622 : scr[(y/2)*x+jj*x+ii]=tmpptr[jj*x/2+ii];
623 : }
624 : }
625 20178 : tmpo.putStorage(tmpptr, gool);
626 20178 : }
627 :
628 : ////
629 :
630 : //if(rot)
631 : /*{
632 :
633 : Matrix<Complex> tmpo(x, y/2);
634 : Complex* tmpptr=tmpo.getStorage(gool);
635 : for (Long jj=0; jj< y/2; ++jj){
636 : for(Long ii=0; ii < x; ++ii){
637 : tmpptr[jj*x+ii]=scr[(y-jj-1)*x+(x-ii-1)];
638 : scr[(y-jj-1)*x+(x-ii-1)]=scr[jj*x+ii];
639 : }
640 : }
641 : for (Long jj=0; jj< y/2; ++jj){
642 : for(Long ii=0; ii < x; ++ii){
643 : scr[jj*x+ii]= tmpptr[jj*x+ii];
644 : }
645 : }
646 : }*/
647 :
648 :
649 :
650 :
651 20178 : }
652 20 : void FFT2D::fftShift(Float*& s, Long x, Long y){
653 : ////Lets try our own flip
654 :
655 : Bool gool;
656 20 : Float* scr=s;
657 20 : Matrix<Float> tmpo(x/2, y/2);
658 20 : Float* tmpptr=tmpo.getStorage(gool);
659 : ////TEST
660 : //omp_set_num_threads(1);
661 : /////
662 20 : #pragma omp parallel for default(none) firstprivate(x, y, tmpptr, scr)
663 : for (Long jj=0; jj< y/2; ++jj){
664 : for(Long ii=0; ii < x/2; ++ii){
665 : tmpptr[jj*x/2+ii]=scr[(y/2)*x+(jj*x+x/2)+ii];
666 : scr[(y/2)*x+(jj*x+x/2)+ii]=scr[jj*x+ii];
667 : }
668 : }
669 20 : #pragma omp parallel for default(none) firstprivate(x, y, tmpptr, scr)
670 : for (Long jj=0; jj< y/2; ++jj){
671 : for(Long ii=0; ii < x/2; ++ii){
672 : scr[jj*x+ii]=tmpptr[jj*x/2+ii];
673 : }
674 : }
675 20 : #pragma omp parallel for default(none) firstprivate(x, y, tmpptr, scr)
676 : for (Long jj=0; jj< y/2; ++jj){
677 : for(Long ii=0; ii < x/2; ++ii){
678 : tmpptr[jj*x/2+ii]=scr[(jj*x+x/2)+ii];
679 : scr[(jj*x+x/2)+ii]=scr[(y/2)*x+jj*x+ii];
680 : }
681 : }
682 20 : #pragma omp parallel for default(none) firstprivate(x, y, tmpptr, scr)
683 : for (Long jj=0; jj< y/2; ++jj){
684 : for(Long ii=0; ii < x/2; ++ii){
685 : scr[(y/2)*x+jj*x+ii]=tmpptr[jj*x/2+ii];
686 : }
687 : }
688 :
689 :
690 :
691 20 : }
692 2548 : void FFT2D::complexConvert(DComplex*& scrD, Complex*& scr, const ooLong len, const Bool down){
693 2548 : if(down){
694 901204730 : for(ooLong k=0; k< len; ++k){
695 901203456 : scr[k]=(Complex)(scrD[k]);
696 : }
697 : }
698 : else{
699 901204730 : for(ooLong k=0; k< len; ++k){
700 901203456 : scrD[k]=(DComplex)(scr[k]);
701 : }
702 : }
703 2548 : }
704 637 : std::tuple<Long, Long> FFT2D::getShape() {
705 :
706 637 : return std::make_tuple(nx_p, ny_p);
707 : }
708 :
709 :
710 : } //# NAMESPACE CASA - END
|