Line data Source code
1 : //# --------------------------------------------------------------------
2 : //# LineFindingUtils.h: this defines utility functions of line finding
3 : //# --------------------------------------------------------------------
4 : //# Copyright (C) 2015
5 : //# National Astronomical Observatory of Japan
6 : //#
7 : //# This library is free software; you can redistribute it and/or modify it
8 : //# under the terms of the GNU Library General Public License as published by
9 : //# the Free Software Foundation; either version 2 of the License, or (at your
10 : //# option) any later version.
11 : //#
12 : //# This library is distributed in the hope that it will be useful, but WITHOUT
13 : //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
15 : //# License for more details.
16 : //#
17 : //# You should have received a copy of the GNU Library General Public License
18 : //# along with this library; if not, write to the Free Software Foundation,
19 : //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
20 : //#
21 : //# Correspondence concerning AIPS++ should be addressed as follows:
22 : //# Internet email: casa-feedback@nrao.edu.
23 : //# Postal address: AIPS++ Project Office
24 : //# National Radio Astronomy Observatory
25 : //# 520 Edgemont Road
26 : //# Charlottesville, VA 22903-2475 USA
27 : //#
28 : //# $Id$
29 : #ifndef _CASA_LINE_FINDING_UTILS_H_
30 : #define _CASA_LINE_FINDING_UTILS_H_
31 :
32 : #include <iostream>
33 : #include <string>
34 : #include <list>
35 : #include <sstream>
36 :
37 : #include <casacore/casa/aipstype.h>
38 : #include <casacore/casa/Arrays/Vector.h>
39 :
40 : namespace casa { //# NAMESPACE CASA - BEGIN
41 :
42 : class LineFinderUtils {
43 : public:
44 : /*
45 : * Bin data and mask arrays.
46 : *
47 : * @param[in] num_in : the number of elements in in_data and in_mask
48 : * @param[in] in_data : an input data array to bin
49 : * @param[in] in_mask : an input mask array to bin
50 : * @param[in] bin_size : the number of channels to bin
51 : * @param[in] num_out : the number of elements in out_data and out_mask
52 : * @param[out] out_data : an array to store binned data
53 : * @param[out] out_mask : an array to store binned mask
54 : * @param[in] offset : the channel index in in_data and in_mask
55 : * to start binning. offset should be 0 if keepsize=false.
56 : * @param[in] keepsize : if true, keep array size after binning.
57 : * otherwise, the output arrays will have (num_in-offset+1)/bin_size channel.
58 : * num_out should be equal to num_in when keepsize=true.
59 : */
60 : template<typename DataType>
61 : static size_t binDataAndMask(size_t const num_in,
62 : DataType const in_data[/*num_in*/], bool const in_mask[/*num_in*/],
63 : size_t const bin_size, size_t const num_out,
64 : DataType out_data[/*num_out*/], bool out_mask[/*num_out*/],
65 : size_t const offset = 0, bool const keepsize = false);
66 :
67 : /*
68 : * Calculate median absolute deviation (MAD) of a data array
69 : * Processes mad[i] = data[i] - (median of valid elements in data)
70 : *
71 : * @param[in] num_data : the number of elements in @a in_data ,
72 : * @a in_mask , and @a mad.
73 : * @param[in] in_data : data array to calculate MAD
74 : * @param[in] in_mask : a boolean mask array. the elements
75 : * with mask=false will be ignored in calculating median.
76 : * @param[out] mad : an array of MAD values
77 : */
78 : static void calculateMAD(size_t const num_data,
79 : float const in_data[/*num_data*/], bool const in_mask[/*num_data*/],
80 : float mad[/*num_data*/]);
81 :
82 : /*
83 : * Count the number of true elements in a boolean array.
84 : *
85 : * @param[in] num_data : the number of elements in @a data
86 : * @param[in] data : a boolean array
87 : * @return the number of elements with true in @a data
88 : */
89 3504 : inline static size_t countTrue(size_t num_data,
90 : bool const data[/*num_data*/]) {
91 3504 : size_t ntrue = 0;
92 : static_assert(static_cast<uint8_t>(true)==1, "cast of bool failed");
93 : static_assert(static_cast<uint8_t>(false)==0, "cast of bool failed");
94 3504 : uint8_t const *data8 = reinterpret_cast<uint8_t const *>(data);
95 : static_assert(sizeof(data[0])==sizeof(data8[0]),
96 : "bool and uint8_t has different size");
97 12742640 : for (size_t i = 0; i < num_data; ++i) {
98 12739136 : ntrue += data8[i];
99 : }
100 3504 : return ntrue;
101 : }
102 : ;
103 :
104 : /*
105 : * create mask by a threshold value
106 : * out_mask[i] = in_mask[i] && (in_data[i] >= threshold)
107 : *
108 : * @param[in] num_data : the number of elements in @a in_data ,
109 : * @a in_mask , and @a out_mask
110 : * @param[in] in_data : a data array
111 : * @param[in] in_mask : a boolean mask array
112 : * @param[in] threshold : a threshold value to compare with @a in_data
113 : * @param[out] out_mask : an output mask array
114 : */
115 : static void createMaskByAThreshold(size_t const num_data,
116 : float const in_data[/*num_data*/], bool const in_mask[/*num_data*/],
117 : float const threshold, bool out_mask[/*num_data*/]);
118 : /*
119 : create mask by threshold value array
120 : out_mask[i] = in_mask[i] && (in_data[i] >= threshold_array[i])
121 : */
122 : /* template <typename DataType> */
123 : /* static void createMaskByThresholds(size_t const num_data, */
124 : /* DataType const in_data[/\*num_data*\/], */
125 : /* bool const in_mask[/\*num_data*\/], */
126 : /* DataType const threshold_array[/\*num_data*\/], */
127 : /* bool out_mask[/\*num_data*\/]); */
128 :
129 : /*
130 : * create sign array by a threshold value
131 : * sign[i] = +1 (in_data[i] > threshold)
132 : * = 0 (in_data[i] == threshold)
133 : * = -1 (in_data[i] < threshold)
134 : *
135 : * @param[in] num_data : the number of elements of @a in_data and @a sign
136 : * @param[in] in_data : an input data array to calculate sign
137 : * @param[in] threshold : a threshold to calculate sign
138 : * @param[out] sign : an output array
139 : */
140 : template<typename DataType>
141 24690 : inline static void createSignByAThreshold(size_t const num_data,
142 : DataType const in_data[/*num_data*/], DataType const threshold,
143 : int8_t sign[/*num_data*/]) {
144 91320537 : for (size_t i = 0; i < num_data; ++i) {
145 91295847 : sign[i] = signCompare(in_data[i], threshold);
146 : }
147 24690 : }
148 : ;
149 :
150 : /*
151 : * create sign array by data and threshold arrays
152 : * sign[i] = +1 (in_data[i] > threshold_array[i])
153 : * = 0 (in_data[i] == threshold_array[i])
154 : * = -1 (in_data[i] < threshold_array[i])
155 : *
156 : * @param[in] num_data : the number of elements of @a in_data ,
157 : * @a threshod_array, and @a sign
158 : * @param[in] in_data : an input data array to calculate sign
159 : * @param[in] threshold_array : a threshold array to calculate sign
160 : * @param[out] sign : an output array
161 : */
162 : template<typename DataType>
163 : inline static void createSignByThresholds(size_t const num_data,
164 : DataType const in_data[/*num_data*/],
165 : DataType const threshold_array[/*num_data*/],
166 : int8_t sign[/*num_data*/]) {
167 : for (size_t i = 0; i < num_data; ++i) {
168 : sign[i] = signCompare(in_data[i], threshold_array[i]);
169 : }
170 : }
171 : ;
172 :
173 : /*
174 : * Convert channel idxs of line ranges of binned array to the ones before binning
175 : *
176 : * @param[in] bin_size : the number of channels binned
177 : * @param[in] offset : the offset channel idx
178 : * @param[in,out] range_list : the list of line channel ranges to convert
179 : */
180 : static void deBinRanges(size_t const bin_size, size_t const offset,
181 : std::list<std::pair<size_t, size_t>>& range_list);
182 :
183 : /*
184 : * Extend for line wing
185 : * Line ranges are extended while sign has the same value as the range
186 : * AND mask=true.
187 : *
188 : * @param[in] num_sign : the number of elements in sign and mask array
189 : * @param[in] sign : an array with the values either -1, 0, or 1.
190 : * It indicates how far a line could be extended.
191 : * @param[in] mask : a boolean mask. The line range will be truncated
192 : * at the channel, mask=false.
193 : * @param[in,out] range_list : a list of line channel ranges
194 : */
195 : static void extendRangeBySign(size_t num_sign,
196 : int8_t const sign[/*num_sign*/], bool const mask[/*num_sign*/],
197 : std::list<std::pair<size_t, size_t>>& range_list);
198 :
199 : /*
200 : * Get median of an sorted array.
201 : * @param[in] num_data: the number of elements from which median is calculated
202 : * @param[in] data: values should be sorted in ascending order.
203 : * @param[in] Must neighther have Inf nor Nan in elements 0-num_data-1.
204 : *
205 : * When the number of elements is odd the function returns data[num_data/2].
206 : * Otherwise, average of middle two elements, i.e, (data[num_data/2]+data[num_data/2-1])/2
207 : */
208 : template<typename DataType>
209 57225 : inline static DataType getMedianOfSorted(size_t const num_data,
210 : DataType const data[/*num_data*/]) {
211 57225 : return (data[num_data / 2] + data[num_data / 2 - ((num_data + 1) % 2)])
212 57225 : / static_cast<DataType>(2);
213 : }
214 : ;
215 :
216 : /*
217 : * Get median of an array taking mask into account.
218 : *
219 : * @param[in] num_data : the number of elements in data and mask arrays
220 : * @param[in] data : data array to calculate median from
221 : * @param[in] mask : mask array. Only elements with mask[i]=true is
222 : * taken into account to calculate median
223 : * @param[in] fraction : a fraction of valid channels to calculate median
224 : * lower data are used.
225 : */
226 : static float maskedMedian(size_t num_data, float const* data,
227 : bool const mask[/*num_data*/], float fraction = 1.0);
228 :
229 : /*
230 : * Convert boolean channel mask to channel index ranges.
231 : * For example:
232 : * mask = {F, T, T, F, T} -> [[1,2], [4,4]]
233 : * @param[in] num_mask : the number of elements in mask
234 : * @param[in] mask : a boolean array of mask
235 : * @param[out] out_range : a list of line channel range
236 : */
237 : static void maskToRangesList(size_t const num_mask,
238 : bool const mask[/*num_mask*/],
239 : std::list<std::pair<size_t, size_t>>& out_range);
240 :
241 : /*
242 : * Merge line ranges if the ranges are separated only by flagged
243 : * channels, i.e., mask=false.
244 : *
245 : * @param[in] num_mask : the number of elements in mask
246 : * @param[in] mask : a boolean array to store mask
247 : * @param[in] maxgap : the maximum separation of line channels to merge
248 : * @param[in,out] range_list : a list of line channel ranges
249 : *
250 : */
251 : static void mergeGapByFalse(size_t const num_mask,
252 : bool const mask[/*num_mask*/], size_t const maxgap,
253 : std::list<std::pair<size_t, size_t>>& range_list);
254 :
255 : /*
256 : * Merge line ranges if the ranges are overlapped.
257 : *
258 : * @param[in] range_list : a list of line range to modify
259 : * The list should be sorted in ascending order of idx
260 : * for both for first and second elements of pairs.
261 : */
262 : static void mergeOverlappingRanges(
263 : std::list<std::pair<size_t, size_t>>& range_list);
264 :
265 : /*
266 : * Merge line ranges if the ranges two lists are overlapped.
267 : *
268 : * @param[in,out] to : a list of line range to merge
269 : * @param[in] from : a list of line range to merge
270 : * Both lists should be sorted in ascending order of idx
271 : * for both for first and second elements of pairs.
272 : */
273 : static void mergeOverlapInTwoLists(std::list<std::pair<size_t, size_t>>& to,
274 : std::list<std::pair<size_t, size_t>>& from);
275 :
276 : /*
277 : * Merge line ranges if the gap between lines are smaller than
278 : * a certain fraction of narrower line.
279 : * @param[in] fraction : the fraction to narrower line
280 : * @param[in] maxwidth :
281 : * @param[in] range_list : a list of line range to modify
282 : * The list should be sorted in ascending order of idx
283 : * for both for first and second elements of pairs.
284 : */
285 : static void mergeSmallGapByFraction(double const fraction,
286 : size_t const maxwidth,
287 : std::list<std::pair<size_t, size_t>>& range_list);
288 :
289 : /*
290 : * Remove line ranges larger than maxwidth from the list.
291 : *
292 : * @param[in] maxwidth : maximum line width allowed
293 : * @param[in] range_list : a list of line range to test
294 : */
295 : static void rejectWideRange(size_t const maxwidth,
296 : std::list<std::pair<size_t, size_t>>& range_list);
297 :
298 : /*
299 : * Remove line ranges smaller than minwidth from the list.
300 : *
301 : * @param[in] minwidth : minimum line width allowed
302 : * @param[in] range_list : a list of line range to test
303 : */
304 : static void rejectNarrowRange(size_t const minwidth,
305 : std::list<std::pair<size_t, size_t>>& range_list);
306 : /* template <typename DataType> */
307 : /* static void splitLines(DataType const& in_data, bool const& in_mask, */
308 : /* std::list<std::pair<size_t,size_t>> const& in_range, */
309 : /* std::list<std::pair<size_t,size_t>>& out_range); */
310 :
311 : inline static std::string FormatLineString(std::list<std::pair<size_t,size_t>> &line_list) {
312 : std::ostringstream oss;
313 : oss << "number of Lines = " << line_list.size() << std::endl;
314 : for (std::list<std::pair<size_t,size_t>>::iterator iter = line_list.begin();
315 : iter!=line_list.end(); ++iter) {
316 : oss << "- [ " << (*iter).first << ", " << (*iter).second << " ] (width: "
317 : << (*iter).second-(*iter).first+1 << ")" << std::endl;
318 : }
319 : return oss.str();
320 : };
321 :
322 : private:
323 : /*
324 : * Merge a line range to a list of line range taking into account the overlap.
325 : */
326 : static size_t mergeARangeToList(
327 : std::list<std::pair<size_t, size_t>>& range_list,
328 : std::pair<size_t, size_t>& new_range, size_t const cursor = 0);
329 :
330 : /*
331 : * Compare data and threshold and returns
332 : * 1 if data > threshold
333 : * 0 if data == threshold
334 : * -1 if data < threshold
335 : */
336 : template<typename DataType>
337 91295847 : inline static int8_t signCompare(DataType const& data,
338 : DataType const& threshold) {
339 91295847 : if (data > threshold)
340 69286795 : return 1;
341 22009052 : else if (data < threshold)
342 21998718 : return -1;
343 : else
344 10334 : return 0;
345 : }
346 : ;
347 :
348 : };
349 :
350 : } //# NAMESPACE CASA - END
351 :
352 : #endif /* _CASA_LINE_FINDING_UTILS_H_ */
|