Coverage for /wheeldirectory/casa-6.7.0-12-py3.10.el8/lib/py/lib/python3.10/site-packages/casatasks/private/mslisthelper.py: 82%
234 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-01 07:19 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-01 07:19 +0000
2import os
3import math
4import numpy as np
5import fnmatch
7import subprocess
8from collections import OrderedDict as odict
10###some helper tools
11from casatasks import casalog
12from casatools import table, msmetadata
13from casatools import quanta as qatool
14from casatools import ms as mstool
16ms = mstool()
17tb = table()
18msmd = msmetadata()
21def check_mslist(vis, ignore_tables=['SORTED_TABLE'], testcontent=True):
22 """
23 Check the consistency of the setup of the MSs in the list "vis"
24 Returns a dictionary describing the inconsistencies w.r.t. to the first MS in the list:
25 {'<vis 1>':
26 '<tablename1>': {'present_a': True/False,
27 'present_b': True/False,
28 'missingcol_a':[<column1>, <column2>,...],
29 'missingcol_b':[<column1>, <column2>,...]},
30 '<tablename2>: {'present_a': True/False,
31 'present_b': True/False,
32 'missingcol_a':[<column1>, <column2>,...],
33 'missingcol_b':[<column1>, <column2>,...]}
35 '<vis 2>':
36 ...
37 }
39 where <vis n> is the name of the nth MS in the input list of MSs. An entry for a given MS
40 is only present if there are differences between that MS and the first MS in the list.
42 If there are no differences in the setup of the MSs, the returned dictionary is empty.
44 If there are differences for an MS, there is a dictionary item for each table which has a different
45 setup and the value is a dictionary with two lists of the names of the columns which are
46 missing in the table of MS A (the first one in the list) and table of MS B (the one compared to).
47 "Missing" is to be understood as "present in the other table but not in this one".
48 Furthermore, the dictionary contains the items "present_a" and "present_b" which are True
49 if the given table is present at all in MS A and MS B respectively.
51 The optional parameter "ignore_tables" defines the list of subtables of Main which
52 are to be ignored in the comparison. Default: ['SORTED_TABLE']
53 Table names can be provided using wildcards like "*" and "?", e.g. 'ASDM_*'.
55 If the optional parameter testcontent==True, then for a column which is absent in one table
56 it is tested in the other table whether the column actually contains data,
57 i.e. cell 0 can be read. If not, the absence of the column is ignored.
59 Independently from the value of testcontent, all optional Main table columns are
60 tested as to whether they are present and if so whether they contain data.
61 A warning is raised if they don't contain data.
63 """
65 rval = {}
67 if type(vis) != list:
68 if type(vis) == str:
69 vis = [vis]
70 else:
71 raise ValueError('vis parameter needs to be a list of strings.')
73 if type(ignore_tables) != list:
74 if type(ignore_tables) == str:
75 ignore_tables = [ignore_tables]
76 else:
77 raise ValueError('ignore_tables parameter needs to be a list of strings.')
80 if len(vis) == 1:
81 try:
82 ms.open(vis[0])
83 ms.close()
84 except:
85 raise ValueError(vis[0]+' does not exist or is not a MeasurementSet.')
87 return rval
89 haspointing = np.zeros(len(vis)) # track the presence of pointing tables
90 viscount = 0
92 # Gather information from first MS in list
94 tb.open(vis[0])
95 descr_a = tb.getdesc()
96 tb.close()
98 descr_a['_name_'] = vis[0]
100 descr_a_kw = descr_a['_keywords_']
101 if not 'MS_VERSION' in descr_a_kw:
102 raise ValueError(vis[0]+' is not a MeasurementSet.')
104 # Eliminate the tables to be ignored
105 tbdel = []
106 for mytablepattern in ignore_tables:
107 for mytable in descr_a_kw:
108 if fnmatch.fnmatch(mytable, mytablepattern):
109 tbdel.append(mytable)
110 for mytable in tbdel:
111 del descr_a_kw[mytable]
113 # Extract subtable details
114 subtbpaths_a = []
115 subtbnames_a = []
116 subtbdescs_a = []
118 for mysubtb in descr_a_kw:
119 if type(descr_a_kw[mysubtb]) == str:
120 subtbpath = descr_a_kw[mysubtb].split(' ')
121 if subtbpath[0] == 'Table:':
122 subtbpaths_a.append(subtbpath[1])
123 myname = subtbpath[1].split('/')[-1]
124 subtbnames_a.append(myname)
125 tb.open(subtbpath[1])
126 mydesc = tb.getdesc()
127 if myname == 'POINTING':
128 haspointing[0] = 1
129 casalog.post('Checking for unpopulated POINTING table in first MS ...', 'INFO')
130 try:
131 tb.getcell('TIME',0)
132 except:
133 haspointing[0] = 0
134 tb.close()
135 mydesc['_name_'] = subtbpath[1]
136 subtbdescs_a.append(mydesc)
138 casalog.post('Checking for unpopulated optional Main Table columns in first MS ...', 'INFO')
139 opt_main_populated(descr_a) # ... in first MS
141 # Loop over other MSs and check against first
143 for myvis in vis[1:]:
144 if myvis==vis[0]:
145 raise ValueError(myvis+' is contained in the list more than once.')
147 viscount += 1
149 tb.open(myvis)
150 descr_b = tb.getdesc()
151 tb.close()
153 descr_b['_name_'] = myvis
155 descr_b_kw = descr_b['_keywords_']
156 if not 'MS_VERSION' in descr_b_kw:
157 raise ValueError(myvis+' is not a MeasurementSet.')
159 # Eliminate the tables to be ignored
160 tbdel = []
161 for mytablepattern in ignore_tables:
162 for mytable in descr_b_kw:
163 if fnmatch.fnmatch(mytable, mytablepattern):
164 tbdel.append(mytable)
165 for mytable in tbdel:
166 del descr_b_kw[mytable]
168 # Extract subtable details
169 subtbpaths_b = []
170 subtbnames_b = []
171 subtbdescs_b = []
173 for mysubtb in descr_b_kw:
174 if type(descr_b_kw[mysubtb]) == str:
175 subtbpath = descr_b_kw[mysubtb].split(' ')
176 if subtbpath[0] == 'Table:':
177 subtbpaths_b.append(subtbpath[1])
178 myname = subtbpath[1].split('/')[-1]
179 subtbnames_b.append(myname)
180 tb.open(subtbpath[1])
181 mydesc = tb.getdesc()
182 if myname == 'POINTING':
183 haspointing[viscount] = 1
184 casalog.post('Checking for unpopulated POINTING table ...', 'INFO')
185 try:
186 tb.getcell('TIME',0)
187 except:
188 haspointing[viscount] = 0
189 tb.close()
190 mydesc['_name_'] = subtbpath[1]
191 subtbdescs_b.append(mydesc)
193 # Comparison
194 compresult = {}
196 # Main table
197 cmpres = comptbdescr(descr_a, descr_b, testcontent=testcontent)
198 if cmpres != {}:
199 compresult['Main'] = cmpres
201 casalog.post('Checking for unpopulated optional Main Table columns ...', 'INFO')
202 opt_main_populated(descr_b)
204 # Subtables
205 for i in range(len(subtbnames_a)): # loop over tables in first MS
206 if not subtbnames_a[i] in subtbnames_b:
207 compresult[subtbnames_a[i]] = {'present_a': True, 'present_b': False}
208 else: # table is present in both MSs
209 cmpres = comptbdescr(subtbdescs_a[i], subtbdescs_b[ subtbnames_b.index(subtbnames_a[i]) ],
210 testcontent=testcontent)
211 if cmpres != {}:
212 compresult[subtbnames_a[i]] = cmpres
214 for i in range(len(subtbnames_b)): # loop over tables in second MS
215 if not subtbnames_b[i] in subtbnames_a:
216 compresult[subtbnames_b[i]] = {'present_a': False, 'present_b': True}
217 # else clause not needed since already covered in previous loop
219 if compresult != {}:
220 rval[myvis] = compresult
222 # evaluate haspointing array
223 if (1 in haspointing) and (False in ( haspointing == 1 )):
224 casalog.post('Some but not all of the input MSs are lacking a populated POINTING table:', 'WARN')
225 for i in range(len(haspointing)):
226 if haspointing[i] == 0:
227 casalog.post(' '+str(i)+': '+vis[i], 'WARN')
228 casalog.post('The joint dataset will not have a valid POINTING table.', 'WARN')
230 return rval
233def comptbdescr(descr_a, descr_b, ignorecol=[], testcontent=True):
234 """Utility function for check_mslist
235 - compares two table descriptions descr_a and descr_b
236 - the absence of the columns listed in ignorecol is ignored
237 - if testcontent==True, then for a column which is absent in one table
238 it is tested in the other table whether the column actually contains data,
239 i.e. cell 0 can be read. If not, the absence of the column is ignored.
240 For this to work, the table path has to be added to the table description
241 as item "_name_".
242 """
243 rval = {}
244 mscol_a = []
245 mscol_b = []
246 for myentry in descr_a:
247 if myentry[0]!='_' and not myentry in ignorecol: # only inspect relevant columns
248 if not myentry in descr_b:
249 if testcontent:
250 tb.open(descr_a['_name_'])
251 try:
252 tb.getcell(myentry,0)
253 except:
254 tb.close()
255 casalog.post('Column '+myentry+' in table '+descr_a['_name_']+' has no data.','INFO')
256 continue # i.e. ignore this column because it has no data
257 tb.close()
258 mscol_b.append(myentry)
259 for myentry in descr_b:
260 if myentry[0]!='_' and not myentry in ignorecol: # only inspect relevant columns
261 if not myentry in descr_a:
262 if testcontent:
263 tb.open(descr_b['_name_'])
264 try:
265 tb.getcell(myentry,0)
266 except:
267 tb.close()
268 casalog.post('Column '+myentry+' in table '+descr_b['_name_']+' has no data.','INFO')
269 continue # i.e. ignore this column because it has no data
270 tb.close()
271 mscol_a.append(myentry)
272 if mscol_a!=[] or mscol_b!=[]:
273 rval = {'present_a': True, 'present_b': True,
274 'missingcol_a': mscol_a, 'missingcol_b': mscol_b}
276 return rval
278def sort_mslist(vis, visweightscale=None):
279 """
280 Returns two or three items:
281 1) list of MSs sorted by the earliest entry in the Main table TIME column.
282 2) list of sorted MS start times
283 3) if visweightscale!=None and contains a list of corresponding numbers,
284 they are sorted as well and returned as third return value.
285 If visweightscale==[], a list filled with values of 1 is returned.
287 vis - list of MS names
288 visweightscale - list of numbers (e.g. the weight scaling factors in concat)
289 default: None (no value provided)
291 """
292 if type(vis) != list:
293 if type(vis)==str:
294 vis = [vis]
295 else:
296 raise ValueError('Parameter vis should be a list of strings.')
298 doweightscale = True
299 if type(visweightscale)!=list:
300 if visweightscale!=None:
301 try:
302 visweightscale = [float(visweightscale)]
303 except:
304 raise ValueError('Parameter visweightscale should be a list of numerical values or None.')
305 else:
306 doweightscale = False
307 elif visweightscale==[]:
308 visweightscale = list(np.ones(len(vis)))
309 elif len(visweightscale) != len(vis):
310 raise ValueError('Parameter visweightscale should have same length as vis.')
313 sortedvis = []
314 sortedvisweightscale = []
315 sortedtimes = []
316 namestuples = []
317 for name in vis:
318 tb.open(name)
319 times = tb.getcol('TIME')
320 tb.close()
321 times.sort()
322 if doweightscale:
323 namestuples.append( (times[0], name, visweightscale[vis.index(name)]) )
324 else:
325 namestuples.append( (times[0], name, 0) )
327 sorted_namestuples = sorted(namestuples, key=lambda msname: msname[0])
329 for i in range(0,len(vis)):
330 sortedvis.append(sorted_namestuples[i][1])
331 sortedtimes.append(sorted_namestuples[i][0])
332 if doweightscale:
333 sortedvisweightscale.append(sorted_namestuples[i][2])
335 if doweightscale:
336 return sortedvis, sortedtimes, sortedvisweightscale
337 else:
338 return sortedvis, sortedtimes
341def report_sort_result(sorted_vis, sorted_times, sorted_idx, mycasalog=None, priority='INFO'):
342 """Report result of MS sort.
344 Args:
345 sorted_vis (list): sorted list of MS
346 sorted_times (list): sorted list of observation start time
347 sorted_idx (list): list of indices of original order of MS list
348 mycasalog (logsink, optional): logsink instance for logging. Defaults to None.
349 priority (str, optional): priority for logging. Defaults to 'WARN'.
350 """
351 if len(sorted_vis) <= 1:
352 # trivial result. do nothing.
353 return
355 if mycasalog is None:
356 local_casalog = casalog
357 else:
358 local_casalog = mycasalog
359 qa = qatool()
360 header = 'Order {:>24s} {:>20s} Original_Order'.format('MS_Name', 'Start_Time')
361 local_casalog.post('Summary of the MS internal sort:', priority=priority)
362 local_casalog.post(header, priority=priority)
363 local_casalog.post('-' * len(header), priority=priority)
364 for isort, (iorig, v, t) in enumerate(zip(sorted_idx, sorted_vis, sorted_times)):
365 local_casalog.post(
366 '{:>3d} {:>26s} {:>20s} {:>3d}'.format(
367 isort,
368 os.path.basename(v.rstrip('/')),
369 qa.time(qa.quantity(t, 's'), form=['ymd', 'hms'])[0],
370 iorig
371 ),
372 priority=priority
373 )
376def opt_main_populated(descr, ignorecol=[]):
377 """Utilty function for check_mslist
378 Check the optional Main Table data columns and raise warnings
379 if they exist but don't contain data.
381 descr - table description of the main table
383 The absence of columns listed in ignorecol is ignored.
385 Returns True if no warnings were raised.
386 """
388 rval = True
390 opt_main_cols = ['DATA', 'FLOAT_DATA', 'LAG_DATA', 'SIGMA_SPECTRUM', 'WEIGHT_SPECTRUM']
392 tbname = descr['_name_']
394 for myentry in opt_main_cols:
395 if myentry in descr and not myentry in ignorecol: # only inspect relevant columns
396 tb.open(tbname)
397 try:
398 tb.getcell(myentry,0)
399 except:
400 tb.close()
401 rval = False
402 casalog.post('Column '+myentry+' in table '+tbname+' has no data. Accessing it will cause errors.','WARN')
403 continue
404 tb.close()
406 return rval