Coverage for /wheeldirectory/casa-6.7.0-12-py3.10.el8/lib/py/lib/python3.10/site-packages/casatasks/private/mslisthelper.py: 8%

234 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-31 19:53 +0000

1 

2import os 

3import math 

4import numpy as np 

5import fnmatch 

6 

7import subprocess 

8from collections import OrderedDict as odict 

9 

10###some helper tools 

11from casatasks import casalog 

12from casatools import table, msmetadata 

13from casatools import quanta as qatool 

14from casatools import ms as mstool 

15 

16ms = mstool() 

17tb = table() 

18msmd = msmetadata() 

19 

20 

21def check_mslist(vis, ignore_tables=['SORTED_TABLE'], testcontent=True): 

22 """ 

23 Check the consistency of the setup of the MSs in the list "vis" 

24 Returns a dictionary describing the inconsistencies w.r.t. to the first MS in the list: 

25 {'<vis 1>': 

26 '<tablename1>': {'present_a': True/False, 

27 'present_b': True/False, 

28 'missingcol_a':[<column1>, <column2>,...], 

29 'missingcol_b':[<column1>, <column2>,...]}, 

30 '<tablename2>: {'present_a': True/False, 

31 'present_b': True/False, 

32 'missingcol_a':[<column1>, <column2>,...], 

33 'missingcol_b':[<column1>, <column2>,...]} 

34 

35 '<vis 2>': 

36 ... 

37 } 

38 

39 where <vis n> is the name of the nth MS in the input list of MSs. An entry for a given MS 

40 is only present if there are differences between that MS and the first MS in the list. 

41 

42 If there are no differences in the setup of the MSs, the returned dictionary is empty. 

43 

44 If there are differences for an MS, there is a dictionary item for each table which has a different 

45 setup and the value is a dictionary with two lists of the names of the columns which are 

46 missing in the table of MS A (the first one in the list) and table of MS B (the one compared to). 

47 "Missing" is to be understood as "present in the other table but not in this one". 

48 Furthermore, the dictionary contains the items "present_a" and "present_b" which are True 

49 if the given table is present at all in MS A and MS B respectively. 

50 

51 The optional parameter "ignore_tables" defines the list of subtables of Main which 

52 are to be ignored in the comparison. Default: ['SORTED_TABLE'] 

53 Table names can be provided using wildcards like "*" and "?", e.g. 'ASDM_*'. 

54 

55 If the optional parameter testcontent==True, then for a column which is absent in one table 

56 it is tested in the other table whether the column actually contains data, 

57 i.e. cell 0 can be read. If not, the absence of the column is ignored. 

58 

59 Independently from the value of testcontent, all optional Main table columns are 

60 tested as to whether they are present and if so whether they contain data. 

61 A warning is raised if they don't contain data. 

62 

63 """ 

64 

65 rval = {} 

66 

67 if type(vis) != list: 

68 if type(vis) == str: 

69 vis = [vis] 

70 else: 

71 raise ValueError('vis parameter needs to be a list of strings.') 

72 

73 if type(ignore_tables) != list: 

74 if type(ignore_tables) == str: 

75 ignore_tables = [ignore_tables] 

76 else: 

77 raise ValueError('ignore_tables parameter needs to be a list of strings.') 

78 

79 

80 if len(vis) == 1: 

81 try: 

82 ms.open(vis[0]) 

83 ms.close() 

84 except: 

85 raise ValueError(vis[0]+' does not exist or is not a MeasurementSet.') 

86 

87 return rval 

88 

89 haspointing = np.zeros(len(vis)) # track the presence of pointing tables 

90 viscount = 0 

91 

92 # Gather information from first MS in list 

93 

94 tb.open(vis[0]) 

95 descr_a = tb.getdesc() 

96 tb.close() 

97 

98 descr_a['_name_'] = vis[0] 

99 

100 descr_a_kw = descr_a['_keywords_'] 

101 if not 'MS_VERSION' in descr_a_kw: 

102 raise ValueError(vis[0]+' is not a MeasurementSet.') 

103 

104 # Eliminate the tables to be ignored 

105 tbdel = [] 

106 for mytablepattern in ignore_tables: 

107 for mytable in descr_a_kw: 

108 if fnmatch.fnmatch(mytable, mytablepattern): 

109 tbdel.append(mytable) 

110 for mytable in tbdel: 

111 del descr_a_kw[mytable] 

112 

113 # Extract subtable details 

114 subtbpaths_a = [] 

115 subtbnames_a = [] 

116 subtbdescs_a = [] 

117 

118 for mysubtb in descr_a_kw: 

119 if type(descr_a_kw[mysubtb]) == str: 

120 subtbpath = descr_a_kw[mysubtb].split(' ') 

121 if subtbpath[0] == 'Table:': 

122 subtbpaths_a.append(subtbpath[1]) 

123 myname = subtbpath[1].split('/')[-1] 

124 subtbnames_a.append(myname) 

125 tb.open(subtbpath[1]) 

126 mydesc = tb.getdesc() 

127 if myname == 'POINTING': 

128 haspointing[0] = 1 

129 casalog.post('Checking for unpopulated POINTING table in first MS ...', 'INFO') 

130 try: 

131 tb.getcell('TIME',0) 

132 except: 

133 haspointing[0] = 0 

134 tb.close() 

135 mydesc['_name_'] = subtbpath[1] 

136 subtbdescs_a.append(mydesc) 

137 

138 casalog.post('Checking for unpopulated optional Main Table columns in first MS ...', 'INFO') 

139 opt_main_populated(descr_a) # ... in first MS 

140 

141 # Loop over other MSs and check against first 

142 

143 for myvis in vis[1:]: 

144 if myvis==vis[0]: 

145 raise ValueError(myvis+' is contained in the list more than once.') 

146 

147 viscount += 1 

148 

149 tb.open(myvis) 

150 descr_b = tb.getdesc() 

151 tb.close() 

152 

153 descr_b['_name_'] = myvis 

154 

155 descr_b_kw = descr_b['_keywords_'] 

156 if not 'MS_VERSION' in descr_b_kw: 

157 raise ValueError(myvis+' is not a MeasurementSet.') 

158 

159 # Eliminate the tables to be ignored 

160 tbdel = [] 

161 for mytablepattern in ignore_tables: 

162 for mytable in descr_b_kw: 

163 if fnmatch.fnmatch(mytable, mytablepattern): 

164 tbdel.append(mytable) 

165 for mytable in tbdel: 

166 del descr_b_kw[mytable] 

167 

168 # Extract subtable details 

169 subtbpaths_b = [] 

170 subtbnames_b = [] 

171 subtbdescs_b = [] 

172 

173 for mysubtb in descr_b_kw: 

174 if type(descr_b_kw[mysubtb]) == str: 

175 subtbpath = descr_b_kw[mysubtb].split(' ') 

176 if subtbpath[0] == 'Table:': 

177 subtbpaths_b.append(subtbpath[1]) 

178 myname = subtbpath[1].split('/')[-1] 

179 subtbnames_b.append(myname) 

180 tb.open(subtbpath[1]) 

181 mydesc = tb.getdesc() 

182 if myname == 'POINTING': 

183 haspointing[viscount] = 1 

184 casalog.post('Checking for unpopulated POINTING table ...', 'INFO') 

185 try: 

186 tb.getcell('TIME',0) 

187 except: 

188 haspointing[viscount] = 0 

189 tb.close() 

190 mydesc['_name_'] = subtbpath[1] 

191 subtbdescs_b.append(mydesc) 

192 

193 # Comparison 

194 compresult = {} 

195 

196 # Main table 

197 cmpres = comptbdescr(descr_a, descr_b, testcontent=testcontent) 

198 if cmpres != {}: 

199 compresult['Main'] = cmpres 

200 

201 casalog.post('Checking for unpopulated optional Main Table columns ...', 'INFO') 

202 opt_main_populated(descr_b) 

203 

204 # Subtables 

205 for i in range(len(subtbnames_a)): # loop over tables in first MS 

206 if not subtbnames_a[i] in subtbnames_b: 

207 compresult[subtbnames_a[i]] = {'present_a': True, 'present_b': False} 

208 else: # table is present in both MSs 

209 cmpres = comptbdescr(subtbdescs_a[i], subtbdescs_b[ subtbnames_b.index(subtbnames_a[i]) ], 

210 testcontent=testcontent) 

211 if cmpres != {}: 

212 compresult[subtbnames_a[i]] = cmpres 

213 

214 for i in range(len(subtbnames_b)): # loop over tables in second MS 

215 if not subtbnames_b[i] in subtbnames_a: 

216 compresult[subtbnames_b[i]] = {'present_a': False, 'present_b': True} 

217 # else clause not needed since already covered in previous loop 

218 

219 if compresult != {}: 

220 rval[myvis] = compresult 

221 

222 # evaluate haspointing array 

223 if (1 in haspointing) and (False in ( haspointing == 1 )): 

224 casalog.post('Some but not all of the input MSs are lacking a populated POINTING table:', 'WARN') 

225 for i in range(len(haspointing)): 

226 if haspointing[i] == 0: 

227 casalog.post(' '+str(i)+': '+vis[i], 'WARN') 

228 casalog.post('The joint dataset will not have a valid POINTING table.', 'WARN') 

229 

230 return rval 

231 

232 

233def comptbdescr(descr_a, descr_b, ignorecol=[], testcontent=True): 

234 """Utility function for check_mslist 

235 - compares two table descriptions descr_a and descr_b 

236 - the absence of the columns listed in ignorecol is ignored 

237 - if testcontent==True, then for a column which is absent in one table 

238 it is tested in the other table whether the column actually contains data, 

239 i.e. cell 0 can be read. If not, the absence of the column is ignored. 

240 For this to work, the table path has to be added to the table description 

241 as item "_name_". 

242 """ 

243 rval = {} 

244 mscol_a = [] 

245 mscol_b = [] 

246 for myentry in descr_a: 

247 if myentry[0]!='_' and not myentry in ignorecol: # only inspect relevant columns 

248 if not myentry in descr_b: 

249 if testcontent: 

250 tb.open(descr_a['_name_']) 

251 try: 

252 tb.getcell(myentry,0) 

253 except: 

254 tb.close() 

255 casalog.post('Column '+myentry+' in table '+descr_a['_name_']+' has no data.','INFO') 

256 continue # i.e. ignore this column because it has no data 

257 tb.close() 

258 mscol_b.append(myentry) 

259 for myentry in descr_b: 

260 if myentry[0]!='_' and not myentry in ignorecol: # only inspect relevant columns 

261 if not myentry in descr_a: 

262 if testcontent: 

263 tb.open(descr_b['_name_']) 

264 try: 

265 tb.getcell(myentry,0) 

266 except: 

267 tb.close() 

268 casalog.post('Column '+myentry+' in table '+descr_b['_name_']+' has no data.','INFO') 

269 continue # i.e. ignore this column because it has no data 

270 tb.close() 

271 mscol_a.append(myentry) 

272 if mscol_a!=[] or mscol_b!=[]: 

273 rval = {'present_a': True, 'present_b': True, 

274 'missingcol_a': mscol_a, 'missingcol_b': mscol_b} 

275 

276 return rval 

277 

278def sort_mslist(vis, visweightscale=None): 

279 """ 

280 Returns two or three items: 

281 1) list of MSs sorted by the earliest entry in the Main table TIME column. 

282 2) list of sorted MS start times 

283 3) if visweightscale!=None and contains a list of corresponding numbers, 

284 they are sorted as well and returned as third return value. 

285 If visweightscale==[], a list filled with values of 1 is returned. 

286 

287 vis - list of MS names 

288 visweightscale - list of numbers (e.g. the weight scaling factors in concat) 

289 default: None (no value provided) 

290 

291 """ 

292 if type(vis) != list: 

293 if type(vis)==str: 

294 vis = [vis] 

295 else: 

296 raise ValueError('Parameter vis should be a list of strings.') 

297 

298 doweightscale = True 

299 if type(visweightscale)!=list: 

300 if visweightscale!=None: 

301 try: 

302 visweightscale = [float(visweightscale)] 

303 except: 

304 raise ValueError('Parameter visweightscale should be a list of numerical values or None.') 

305 else: 

306 doweightscale = False 

307 elif visweightscale==[]: 

308 visweightscale = list(np.ones(len(vis))) 

309 elif len(visweightscale) != len(vis): 

310 raise ValueError('Parameter visweightscale should have same length as vis.') 

311 

312 

313 sortedvis = [] 

314 sortedvisweightscale = [] 

315 sortedtimes = [] 

316 namestuples = [] 

317 for name in vis: 

318 tb.open(name) 

319 times = tb.getcol('TIME') 

320 tb.close() 

321 times.sort() 

322 if doweightscale: 

323 namestuples.append( (times[0], name, visweightscale[vis.index(name)]) ) 

324 else: 

325 namestuples.append( (times[0], name, 0) ) 

326 

327 sorted_namestuples = sorted(namestuples, key=lambda msname: msname[0]) 

328 

329 for i in range(0,len(vis)): 

330 sortedvis.append(sorted_namestuples[i][1]) 

331 sortedtimes.append(sorted_namestuples[i][0]) 

332 if doweightscale: 

333 sortedvisweightscale.append(sorted_namestuples[i][2]) 

334 

335 if doweightscale: 

336 return sortedvis, sortedtimes, sortedvisweightscale 

337 else: 

338 return sortedvis, sortedtimes 

339 

340 

341def report_sort_result(sorted_vis, sorted_times, sorted_idx, mycasalog=None, priority='INFO'): 

342 """Report result of MS sort. 

343 

344 Args: 

345 sorted_vis (list): sorted list of MS 

346 sorted_times (list): sorted list of observation start time 

347 sorted_idx (list): list of indices of original order of MS list 

348 mycasalog (logsink, optional): logsink instance for logging. Defaults to None. 

349 priority (str, optional): priority for logging. Defaults to 'WARN'. 

350 """ 

351 if len(sorted_vis) <= 1: 

352 # trivial result. do nothing. 

353 return 

354 

355 if mycasalog is None: 

356 local_casalog = casalog 

357 else: 

358 local_casalog = mycasalog 

359 qa = qatool() 

360 header = 'Order {:>24s} {:>20s} Original_Order'.format('MS_Name', 'Start_Time') 

361 local_casalog.post('Summary of the MS internal sort:', priority=priority) 

362 local_casalog.post(header, priority=priority) 

363 local_casalog.post('-' * len(header), priority=priority) 

364 for isort, (iorig, v, t) in enumerate(zip(sorted_idx, sorted_vis, sorted_times)): 

365 local_casalog.post( 

366 '{:>3d} {:>26s} {:>20s} {:>3d}'.format( 

367 isort, 

368 os.path.basename(v.rstrip('/')), 

369 qa.time(qa.quantity(t, 's'), form=['ymd', 'hms'])[0], 

370 iorig 

371 ), 

372 priority=priority 

373 ) 

374 

375 

376def opt_main_populated(descr, ignorecol=[]): 

377 """Utilty function for check_mslist 

378 Check the optional Main Table data columns and raise warnings 

379 if they exist but don't contain data. 

380 

381 descr - table description of the main table 

382 

383 The absence of columns listed in ignorecol is ignored. 

384 

385 Returns True if no warnings were raised. 

386 """ 

387 

388 rval = True 

389 

390 opt_main_cols = ['DATA', 'FLOAT_DATA', 'LAG_DATA', 'SIGMA_SPECTRUM', 'WEIGHT_SPECTRUM'] 

391 

392 tbname = descr['_name_'] 

393 

394 for myentry in opt_main_cols: 

395 if myentry in descr and not myentry in ignorecol: # only inspect relevant columns 

396 tb.open(tbname) 

397 try: 

398 tb.getcell(myentry,0) 

399 except: 

400 tb.close() 

401 rval = False 

402 casalog.post('Column '+myentry+' in table '+tbname+' has no data. Accessing it will cause errors.','WARN') 

403 continue 

404 tb.close() 

405 

406 return rval 

407 

408 

409