Line data Source code
1 : /*************************************************************************
2 : ALGLIB 3.17.0 (source code generated 2020-12-27)
3 : Copyright (c) Sergey Bochkanov (ALGLIB project).
4 :
5 : >>> SOURCE LICENSE >>>
6 : This program is free software; you can redistribute it and/or modify
7 : it under the terms of the GNU General Public License as published by
8 : the Free Software Foundation (www.fsf.org); either version 2 of the
9 : License, or (at your option) any later version.
10 :
11 : This program is distributed in the hope that it will be useful,
12 : but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : GNU General Public License for more details.
15 :
16 : A copy of the GNU General Public License is available at
17 : http://www.fsf.org/licensing/licenses
18 : >>> END OF LICENSE >>>
19 : *************************************************************************/
20 : #ifdef _MSC_VER
21 : #define _CRT_SECURE_NO_WARNINGS
22 : #endif
23 :
24 : //
25 : // if AE_OS==AE_LINUX (will be redefined to AE_POSIX in ap.h),
26 : // set _GNU_SOURCE flag BEFORE any #includes to get affinity
27 : // management functions
28 : //
29 : #if (AE_OS==AE_LINUX) && !defined(_GNU_SOURCE)
30 : #define _GNU_SOURCE
31 : #endif
32 :
33 : #include "stdafx.h"
34 : #include "ap.h"
35 : #include <limits>
36 : #include <locale.h>
37 : #include <ctype.h>
38 :
39 : #if defined(AE_CPU)
40 : #if (AE_CPU==AE_INTEL)
41 :
42 : #if AE_COMPILER==AE_MSVC
43 : #include <intrin.h>
44 : #endif
45 :
46 : #endif
47 : #endif
48 :
49 : // disable some irrelevant warnings
50 : #if (AE_COMPILER==AE_MSVC) && !defined(AE_ALL_WARNINGS)
51 : #pragma warning(disable:4100)
52 : #pragma warning(disable:4127)
53 : #pragma warning(disable:4611)
54 : #pragma warning(disable:4702)
55 : #pragma warning(disable:4996)
56 : #endif
57 :
58 : /////////////////////////////////////////////////////////////////////////
59 : //
60 : // THIS SECTION IMPLEMENTS BASIC FUNCTIONALITY LIKE
61 : // MEMORY MANAGEMENT FOR VECTORS/MATRICES WHICH IS
62 : // SHARED BETWEEN C++ AND PURE C LIBRARIES
63 : //
64 : /////////////////////////////////////////////////////////////////////////
65 : namespace alglib_impl
66 : {
67 : /*
68 : * OS-specific includes
69 : */
70 : #ifdef AE_USE_CPP
71 : }
72 : #endif
73 : #if AE_OS==AE_WINDOWS || defined(AE_DEBUG4WINDOWS)
74 : #ifndef _WIN32_WINNT
75 : #define _WIN32_WINNT 0x0501
76 : #endif
77 : #include <windows.h>
78 : #include <process.h>
79 : #elif AE_OS==AE_POSIX || defined(AE_DEBUG4POSIX)
80 : #include <time.h>
81 : #include <unistd.h>
82 : #include <pthread.h>
83 : #include <sched.h>
84 : #include <sys/time.h>
85 : #endif
86 : /* Debugging helpers for Windows */
87 : #ifdef AE_DEBUG4WINDOWS
88 : #include <windows.h>
89 : #include <stdio.h>
90 : #endif
91 : #ifdef AE_USE_CPP
92 : namespace alglib_impl
93 : {
94 : #endif
95 :
96 : /*
97 : * local definitions
98 : */
99 : #define x_nb 16
100 : #define AE_DATA_ALIGN 64
101 : #define AE_PTR_ALIGN sizeof(void*)
102 : #define DYN_BOTTOM ((void*)1)
103 : #define DYN_FRAME ((void*)2)
104 : #define AE_LITTLE_ENDIAN 1
105 : #define AE_BIG_ENDIAN 2
106 : #define AE_MIXED_ENDIAN 3
107 : #define AE_SER_ENTRY_LENGTH 11
108 : #define AE_SER_ENTRIES_PER_ROW 5
109 :
110 : #define AE_SM_DEFAULT 0
111 : #define AE_SM_ALLOC 1
112 : #define AE_SM_READY2S 2
113 : #define AE_SM_TO_STRING 10
114 : #define AE_SM_TO_CPPSTRING 11
115 : #define AE_SM_TO_STREAM 12
116 : #define AE_SM_FROM_STRING 20
117 : #define AE_SM_FROM_STREAM 22
118 :
119 : #define AE_LOCK_CYCLES 512
120 : #define AE_LOCK_TESTS_BEFORE_YIELD 16
121 : #define AE_CRITICAL_ASSERT(x) if( !(x) ) abort()
122 :
123 : /* IDs for set_dbg_value */
124 : #define _ALGLIB_USE_ALLOC_COUNTER 0
125 : #define _ALGLIB_USE_DBG_COUNTERS 1
126 : #define _ALGLIB_USE_VENDOR_KERNELS 100
127 : #define _ALGLIB_VENDOR_MEMSTAT 101
128 :
129 : #define _ALGLIB_DEBUG_WORKSTEALING 200
130 : #define _ALGLIB_WSDBG_NCORES 201
131 : #define _ALGLIB_WSDBG_PUSHROOT_OK 202
132 : #define _ALGLIB_WSDBG_PUSHROOT_FAILED 203
133 :
134 : #define _ALGLIB_SET_GLOBAL_THREADING 1001
135 : #define _ALGLIB_SET_NWORKERS 1002
136 :
137 : /* IDs for get_dbg_value */
138 : #define _ALGLIB_GET_ALLOC_COUNTER 0
139 : #define _ALGLIB_GET_CUMULATIVE_ALLOC_SIZE 1
140 : #define _ALGLIB_GET_CUMULATIVE_ALLOC_COUNT 2
141 :
142 : #define _ALGLIB_GET_CORES_COUNT 1000
143 : #define _ALGLIB_GET_GLOBAL_THREADING 1001
144 : #define _ALGLIB_GET_NWORKERS 1002
145 :
146 : /*************************************************************************
147 : Lock.
148 :
149 : This is internal structure which implements lock functionality.
150 : *************************************************************************/
151 : typedef struct
152 : {
153 : #if AE_OS==AE_WINDOWS
154 : volatile ae_int_t * volatile p_lock;
155 : char buf[sizeof(ae_int_t)+AE_LOCK_ALIGNMENT];
156 : #elif AE_OS==AE_POSIX
157 : pthread_mutex_t mutex;
158 : #else
159 : ae_bool is_locked;
160 : #endif
161 : } _lock;
162 :
163 :
164 :
165 :
166 : /*
167 : * Error tracking facilities; this fields are modified every time ae_set_error_flag()
168 : * is called with non-zero cond. Thread unsafe access, but it does not matter actually.
169 : */
170 : static const char * sef_file = "";
171 : static int sef_line = 0;
172 : static const char * sef_xdesc = "";
173 :
174 : /*
175 : * Global flags, split into several char-sized variables in order
176 : * to avoid problem with non-atomic reads/writes (single-byte ops
177 : * are atomic on all modern architectures);
178 : *
179 : * Following variables are included:
180 : * * threading-related settings
181 : */
182 : unsigned char _alglib_global_threading_flags = _ALGLIB_FLG_THREADING_SERIAL>>_ALGLIB_FLG_THREADING_SHIFT;
183 :
184 : /*
185 : * DESCRIPTION: recommended number of active workers:
186 : * * positive value >=1 is used to specify exact number of active workers
187 : * * 0 means that ALL available cores are used
188 : * * negative value means that all cores EXCEPT for cores_to_use will be used
189 : * (say, -1 means that all cores except for one will be used). At least one
190 : * core will be used in this case, even if you assign -9999999 to this field.
191 : *
192 : * Default value = 0 (fully parallel execution) when AE_NWORKERS is not defined
193 : * = 0 for manually defined number of cores (AE_NWORKERS is defined)
194 : * PROTECTION: not needed; runtime modification is possible, but we do not need exact
195 : * synchronization.
196 : */
197 : #if defined(AE_NWORKERS) && (AE_NWORKERS<=0)
198 : #error AE_NWORKERS must be positive number or not defined at all.
199 : #endif
200 : #if defined(AE_NWORKERS)
201 : ae_int_t _alglib_cores_to_use = 0;
202 : #else
203 : ae_int_t _alglib_cores_to_use = 0;
204 : #endif
205 :
206 : /*
207 : * Debug counters
208 : */
209 : ae_int_t _alloc_counter = 0;
210 : ae_int_t _alloc_counter_total = 0;
211 : ae_bool _use_alloc_counter = ae_false;
212 :
213 : ae_int_t _dbg_alloc_total = 0;
214 : ae_bool _use_dbg_counters = ae_false;
215 :
216 : ae_bool _use_vendor_kernels = ae_true;
217 :
218 : ae_bool debug_workstealing = ae_false; /* debug workstealing environment? False by default */
219 : ae_int_t dbgws_pushroot_ok = 0;
220 : ae_int_t dbgws_pushroot_failed = 0;
221 :
222 : #ifdef AE_SMP_DEBUGCOUNTERS
223 : __declspec(align(AE_LOCK_ALIGNMENT)) volatile ae_int64_t _ae_dbg_lock_acquisitions = 0;
224 : __declspec(align(AE_LOCK_ALIGNMENT)) volatile ae_int64_t _ae_dbg_lock_spinwaits = 0;
225 : __declspec(align(AE_LOCK_ALIGNMENT)) volatile ae_int64_t _ae_dbg_lock_yields = 0;
226 : #endif
227 :
228 : /*
229 : * Allocation debugging
230 : */
231 : ae_bool _force_malloc_failure = ae_false;
232 : ae_int_t _malloc_failure_after = 0;
233 :
234 :
235 : /*
236 : * Trace-related declarations:
237 : * alglib_trace_type - trace output type
238 : * alglib_trace_file - file descriptor (to be used by ALGLIB code which
239 : * sends messages to trace log
240 : * alglib_fclose_trace - whether we have to call fclose() when disabling or
241 : * changing trace output
242 : * alglib_trace_tags - string buffer used to store tags + two additional
243 : * characters (leading and trailing commas) + null
244 : * terminator
245 : */
246 : #define ALGLIB_TRACE_NONE 0
247 : #define ALGLIB_TRACE_FILE 1
248 : #define ALGLIB_TRACE_TAGS_LEN 2048
249 : #define ALGLIB_TRACE_BUFFER_LEN (ALGLIB_TRACE_TAGS_LEN+2+1)
250 : static ae_int_t alglib_trace_type = ALGLIB_TRACE_NONE;
251 : FILE *alglib_trace_file = NULL;
252 : static ae_bool alglib_fclose_trace = ae_false;
253 : static char alglib_trace_tags[ALGLIB_TRACE_BUFFER_LEN];
254 :
255 : /*
256 : * Fields for memory allocation over static array
257 : */
258 : #if AE_MALLOC==AE_BASIC_STATIC_MALLOC
259 : #if AE_THREADING!=AE_SERIAL_UNSAFE
260 : #error Basis static malloc is thread-unsafe; define AE_THREADING=AE_SERIAL_UNSAFE to prove that you know it
261 : #endif
262 : static ae_int_t sm_page_size = 0;
263 : static ae_int_t sm_page_cnt = 0;
264 : static ae_int_t *sm_page_tbl = NULL;
265 : static unsigned char *sm_mem = NULL;
266 : #endif
267 :
268 : /*
269 : * These declarations are used to ensure that
270 : * sizeof(ae_bool)=1, sizeof(ae_int32_t)==4, sizeof(ae_int64_t)==8, sizeof(ae_int_t)==sizeof(void*).
271 : * they will lead to syntax error otherwise (array size will be negative).
272 : *
273 : * you can remove them, if you want - they are not used anywhere.
274 : *
275 : */
276 : static char _ae_bool_must_be_8_bits_wide [1-2*((int)(sizeof(ae_bool))-1)*((int)(sizeof(ae_bool))-1)];
277 : static char _ae_int32_t_must_be_32_bits_wide[1-2*((int)(sizeof(ae_int32_t))-4)*((int)(sizeof(ae_int32_t))-4)];
278 : static char _ae_int64_t_must_be_64_bits_wide[1-2*((int)(sizeof(ae_int64_t))-8)*((int)(sizeof(ae_int64_t))-8)];
279 : static char _ae_uint64_t_must_be_64_bits_wide[1-2*((int)(sizeof(ae_uint64_t))-8)*((int)(sizeof(ae_uint64_t))-8)];
280 : static char _ae_int_t_must_be_pointer_sized [1-2*((int)(sizeof(ae_int_t))-(int)sizeof(void*))*((int)(sizeof(ae_int_t))-(int)(sizeof(void*)))];
281 :
282 : /*
283 : * This variable is used to prevent some tricky optimizations which may degrade multithreaded performance.
284 : * It is touched once in the ae_init_pool() function from smp.c in order to prevent optimizations.
285 : *
286 : */
287 : static volatile ae_int_t ae_never_change_it = 1;
288 :
289 : /*************************************************************************
290 : This function should never be called. It is here to prevent spurious
291 : compiler warnings about unused variables (in fact: used).
292 : *************************************************************************/
293 0 : void ae_never_call_it()
294 : {
295 0 : ae_touch_ptr((void*)_ae_bool_must_be_8_bits_wide);
296 0 : ae_touch_ptr((void*)_ae_int32_t_must_be_32_bits_wide);
297 0 : ae_touch_ptr((void*)_ae_int64_t_must_be_64_bits_wide);
298 0 : ae_touch_ptr((void*)_ae_uint64_t_must_be_64_bits_wide);
299 0 : ae_touch_ptr((void*)_ae_int_t_must_be_pointer_sized);
300 0 : }
301 :
302 0 : void ae_set_dbg_flag(ae_int64_t flag_id, ae_int64_t flag_val)
303 : {
304 0 : if( flag_id==_ALGLIB_USE_ALLOC_COUNTER )
305 : {
306 0 : _use_alloc_counter = flag_val!=0;
307 0 : return;
308 : }
309 0 : if( flag_id==_ALGLIB_USE_DBG_COUNTERS )
310 : {
311 0 : _use_dbg_counters = flag_val!=0;
312 0 : return;
313 : }
314 0 : if( flag_id==_ALGLIB_USE_VENDOR_KERNELS )
315 : {
316 0 : _use_vendor_kernels = flag_val!=0;
317 0 : return;
318 : }
319 0 : if( flag_id==_ALGLIB_DEBUG_WORKSTEALING )
320 : {
321 0 : debug_workstealing = flag_val!=0;
322 0 : return;
323 : }
324 0 : if( flag_id==_ALGLIB_SET_GLOBAL_THREADING )
325 : {
326 0 : ae_set_global_threading((ae_uint64_t)flag_val);
327 0 : return;
328 : }
329 0 : if( flag_id==_ALGLIB_SET_NWORKERS )
330 : {
331 0 : _alglib_cores_to_use = (ae_int_t)flag_val;
332 0 : return;
333 : }
334 : }
335 :
336 0 : ae_int64_t ae_get_dbg_value(ae_int64_t id)
337 : {
338 0 : if( id==_ALGLIB_GET_ALLOC_COUNTER )
339 0 : return _alloc_counter;
340 0 : if( id==_ALGLIB_GET_CUMULATIVE_ALLOC_SIZE )
341 0 : return _dbg_alloc_total;
342 0 : if( id==_ALGLIB_GET_CUMULATIVE_ALLOC_COUNT )
343 0 : return _alloc_counter_total;
344 :
345 0 : if( id==_ALGLIB_VENDOR_MEMSTAT )
346 : {
347 : #if defined(AE_MKL)
348 : return ae_mkl_memstat();
349 : #else
350 0 : return 0;
351 : #endif
352 : }
353 :
354 : /* workstealing counters */
355 0 : if( id==_ALGLIB_WSDBG_NCORES )
356 : #if defined(AE_SMP)
357 : return ae_cores_count();
358 : #else
359 0 : return 0;
360 : #endif
361 0 : if( id==_ALGLIB_WSDBG_PUSHROOT_OK )
362 0 : return dbgws_pushroot_ok;
363 0 : if( id==_ALGLIB_WSDBG_PUSHROOT_FAILED )
364 0 : return dbgws_pushroot_failed;
365 :
366 0 : if( id==_ALGLIB_GET_CORES_COUNT )
367 : #if defined(AE_SMP)
368 : return ae_cores_count();
369 : #else
370 0 : return 0;
371 : #endif
372 0 : if( id==_ALGLIB_GET_GLOBAL_THREADING )
373 0 : return (ae_int64_t)ae_get_global_threading();
374 0 : if( id==_ALGLIB_GET_NWORKERS )
375 0 : return (ae_int64_t)_alglib_cores_to_use;
376 :
377 : /* unknown value */
378 0 : return 0;
379 : }
380 :
381 : /************************************************************************
382 : This function sets default (global) threading model:
383 : * serial execution
384 : * multithreading, if cores_to_use allows it
385 :
386 : ************************************************************************/
387 0 : void ae_set_global_threading(ae_uint64_t flg_value)
388 : {
389 0 : flg_value = flg_value&_ALGLIB_FLG_THREADING_MASK;
390 0 : AE_CRITICAL_ASSERT(flg_value==_ALGLIB_FLG_THREADING_SERIAL || flg_value==_ALGLIB_FLG_THREADING_PARALLEL);
391 0 : _alglib_global_threading_flags = (unsigned char)(flg_value>>_ALGLIB_FLG_THREADING_SHIFT);
392 0 : }
393 :
394 : /************************************************************************
395 : This function gets default (global) threading model:
396 : * serial execution
397 : * multithreading, if cores_to_use allows it
398 :
399 : ************************************************************************/
400 0 : ae_uint64_t ae_get_global_threading()
401 : {
402 0 : return ((ae_uint64_t)_alglib_global_threading_flags)<<_ALGLIB_FLG_THREADING_SHIFT;
403 : }
404 :
405 0 : void ae_set_error_flag(ae_bool *p_flag, ae_bool cond, const char *filename, int lineno, const char *xdesc)
406 : {
407 0 : if( cond )
408 : {
409 0 : *p_flag = ae_true;
410 0 : sef_file = filename;
411 0 : sef_line = lineno;
412 0 : sef_xdesc= xdesc;
413 : #ifdef ALGLIB_ABORT_ON_ERROR_FLAG
414 : printf("[ALGLIB] aborting on ae_set_error_flag(cond=true)\n");
415 : printf("[ALGLIB] %s:%d\n", filename, lineno);
416 : printf("[ALGLIB] %s\n", xdesc);
417 : fflush(stdout);
418 : if( alglib_trace_file!=NULL ) fflush(alglib_trace_file);
419 : abort();
420 : #endif
421 : }
422 0 : }
423 :
424 : /************************************************************************
425 : This function returns file name for the last call of ae_set_error_flag()
426 : with non-zero cond parameter.
427 : ************************************************************************/
428 0 : const char * ae_get_last_error_file()
429 : {
430 0 : return sef_file;
431 : }
432 :
433 : /************************************************************************
434 : This function returns line number for the last call of ae_set_error_flag()
435 : with non-zero cond parameter.
436 : ************************************************************************/
437 0 : int ae_get_last_error_line()
438 : {
439 0 : return sef_line;
440 : }
441 :
442 : /************************************************************************
443 : This function returns extra description for the last call of ae_set_error_flag()
444 : with non-zero cond parameter.
445 : ************************************************************************/
446 0 : const char * ae_get_last_error_xdesc()
447 : {
448 0 : return sef_xdesc;
449 : }
450 :
451 0 : ae_int_t ae_misalignment(const void *ptr, size_t alignment)
452 : {
453 : union _u
454 : {
455 : const void *ptr;
456 : ae_int_t iptr;
457 : } u;
458 0 : u.ptr = ptr;
459 0 : return (ae_int_t)(u.iptr%alignment);
460 : }
461 :
462 0 : void* ae_align(void *ptr, size_t alignment)
463 : {
464 0 : char *result = (char*)ptr;
465 0 : if( (result-(char*)0)%alignment!=0 )
466 0 : result += alignment - (result-(char*)0)%alignment;
467 0 : return result;
468 : }
469 :
470 : /************************************************************************
471 : This function maps nworkers number (which can be positive, zero or
472 : negative with 0 meaning "all cores", -1 meaning "all cores -1" and so on)
473 : to "effective", strictly positive workers count.
474 :
475 : This function is intended to be used by debugging/testing code which
476 : tests different number of worker threads. It is NOT aligned in any way
477 : with ALGLIB multithreading framework (i.e. it can return non-zero worker
478 : count even for single-threaded GPLed ALGLIB).
479 : ************************************************************************/
480 0 : ae_int_t ae_get_effective_workers(ae_int_t nworkers)
481 : {
482 : ae_int_t ncores;
483 :
484 : /* determine cores count */
485 : #if defined(AE_NWORKERS)
486 : ncores = AE_NWORKERS;
487 : #elif AE_OS==AE_WINDOWS
488 : SYSTEM_INFO sysInfo;
489 : GetSystemInfo(&sysInfo);
490 : ncores = (ae_int_t)(sysInfo.dwNumberOfProcessors);
491 : #elif AE_OS==AE_POSIX
492 : {
493 : long r = sysconf(_SC_NPROCESSORS_ONLN);
494 : ncores = r<=0 ? 1 : r;
495 : }
496 : #else
497 0 : ncores = 1;
498 : #endif
499 0 : AE_CRITICAL_ASSERT(ncores>=1);
500 :
501 : /* map nworkers to its effective value */
502 0 : if( nworkers>=1 )
503 0 : return nworkers>ncores ? ncores : nworkers;
504 0 : return ncores+nworkers>=1 ? ncores+nworkers : 1;
505 : }
506 :
507 : /*************************************************************************
508 : This function belongs to the family of "optional atomics", i.e. atomic
509 : functions which either perform atomic changes - or do nothing at all, if
510 : current compiler settings do not allow us to generate atomic code.
511 :
512 : All "optional atomics" are synchronized, i.e. either all of them work - or
513 : no one of the works.
514 :
515 : This particular function performs atomic addition on pointer-sized value,
516 : which must be pointer-size aligned.
517 :
518 : NOTE: this function is not intended to be extremely high performance one,
519 : so use it only when necessary.
520 : *************************************************************************/
521 0 : void ae_optional_atomic_add_i(ae_int_t *p, ae_int_t v)
522 : {
523 0 : AE_CRITICAL_ASSERT(ae_misalignment(p,sizeof(void*))==0);
524 : #if AE_OS==AE_WINDOWS
525 : for(;;)
526 : {
527 : /* perform conversion between ae_int_t* and void**
528 : without compiler warnings about indirection levels */
529 : union _u
530 : {
531 : PVOID volatile * volatile ptr;
532 : volatile ae_int_t * volatile iptr;
533 : } u;
534 : u.iptr = p;
535 :
536 : /* atomic read for initial value */
537 : PVOID v0 = InterlockedCompareExchangePointer(u.ptr, NULL, NULL);
538 :
539 : /* increment cached value and store */
540 : if( InterlockedCompareExchangePointer(u.ptr, (PVOID)(((char*)v0)+v), v0)==v0 )
541 : break;
542 : }
543 : #elif (AE_COMPILER==AE_GNUC) && (AE_CPU==AE_INTEL) && (__GNUC__*100+__GNUC__>=470)
544 : __atomic_add_fetch(p, v, __ATOMIC_RELAXED);
545 : #else
546 : #endif
547 0 : }
548 :
549 : /*************************************************************************
550 : This function belongs to the family of "optional atomics", i.e. atomic
551 : functions which either perform atomic changes - or do nothing at all, if
552 : current compiler settings do not allow us to generate atomic code.
553 :
554 : All "optional atomics" are synchronized, i.e. either all of them work - or
555 : no one of the works.
556 :
557 : This particular function performs atomic subtraction on pointer-sized
558 : value, which must be pointer-size aligned.
559 :
560 : NOTE: this function is not intended to be extremely high performance one,
561 : so use it only when necessary.
562 : *************************************************************************/
563 0 : void ae_optional_atomic_sub_i(ae_int_t *p, ae_int_t v)
564 : {
565 0 : AE_CRITICAL_ASSERT(ae_misalignment(p,sizeof(void*))==0);
566 : #if AE_OS==AE_WINDOWS
567 : for(;;)
568 : {
569 : /* perform conversion between ae_int_t* and void**
570 : without compiler warnings about indirection levels */
571 : union _u
572 : {
573 : PVOID volatile * volatile ptr;
574 : volatile ae_int_t * volatile iptr;
575 : } u;
576 : u.iptr = p;
577 :
578 : /* atomic read for initial value, convert it to 1-byte pointer */
579 : PVOID v0 = InterlockedCompareExchangePointer(u.ptr, NULL, NULL);
580 :
581 : /* increment cached value and store */
582 : if( InterlockedCompareExchangePointer(u.ptr, (PVOID)(((char*)v0)-v), v0)==v0 )
583 : break;
584 : }
585 : #elif (AE_COMPILER==AE_GNUC) && (AE_CPU==AE_INTEL) && (__GNUC__*100+__GNUC__>=470)
586 : __atomic_sub_fetch(p, v, __ATOMIC_RELAXED);
587 : #else
588 : #endif
589 0 : }
590 :
591 :
592 : /*************************************************************************
593 : This function cleans up automatically managed memory before caller terminates
594 : ALGLIB executing by ae_break() or by simply stopping calling callback.
595 :
596 : For state!=NULL it calls thread_exception_handler() and the ae_state_clear().
597 : For state==NULL it does nothing.
598 : *************************************************************************/
599 0 : void ae_clean_up_before_breaking(ae_state *state)
600 : {
601 0 : if( state!=NULL )
602 : {
603 0 : if( state->thread_exception_handler!=NULL )
604 0 : state->thread_exception_handler(state);
605 0 : ae_state_clear(state);
606 : }
607 0 : }
608 :
609 : /*************************************************************************
610 : This function abnormally aborts program, using one of several ways:
611 :
612 : * for state!=NULL and state->break_jump being initialized with call to
613 : ae_state_set_break_jump() - it performs longjmp() to return site.
614 : * otherwise, abort() is called
615 :
616 : In all cases, for state!=NULL function sets state->last_error and
617 : state->error_msg fields. It also clears state with ae_state_clear().
618 :
619 : If state is not NULL and state->thread_exception_handler is set, it is
620 : called prior to handling error and clearing state.
621 : *************************************************************************/
622 0 : void ae_break(ae_state *state, ae_error_type error_type, const char *msg)
623 : {
624 0 : if( state!=NULL )
625 : {
626 0 : if( alglib_trace_type!=ALGLIB_TRACE_NONE )
627 0 : ae_trace("---!!! CRITICAL ERROR !!!--- exception with message '%s' was generated\n", msg!=NULL ? msg : "");
628 0 : ae_clean_up_before_breaking(state);
629 0 : state->last_error = error_type;
630 0 : state->error_msg = msg;
631 0 : if( state->break_jump!=NULL )
632 0 : longjmp(*(state->break_jump), 1);
633 : else
634 0 : abort();
635 : }
636 : else
637 0 : abort();
638 : }
639 :
640 : #if AE_MALLOC==AE_BASIC_STATIC_MALLOC
641 : void set_memory_pool(void *ptr, size_t size)
642 : {
643 : /*
644 : * Integrity checks
645 : */
646 : AE_CRITICAL_ASSERT(sm_page_size==0);
647 : AE_CRITICAL_ASSERT(sm_page_cnt==0);
648 : AE_CRITICAL_ASSERT(sm_page_tbl==NULL);
649 : AE_CRITICAL_ASSERT(sm_mem==NULL);
650 : AE_CRITICAL_ASSERT(size>0);
651 :
652 : /*
653 : * Align pointer
654 : */
655 : size -= ae_misalignment(ptr, sizeof(ae_int_t));
656 : ptr = ae_align(ptr, sizeof(ae_int_t));
657 :
658 : /*
659 : * Calculate page size and page count, prepare pointers to page table and memory
660 : */
661 : sm_page_size = 256;
662 : AE_CRITICAL_ASSERT(size>=(sm_page_size+sizeof(ae_int_t))+sm_page_size); /* we expect to have memory for at least one page + table entry + alignment */
663 : sm_page_cnt = (size-sm_page_size)/(sm_page_size+sizeof(ae_int_t));
664 : AE_CRITICAL_ASSERT(sm_page_cnt>0);
665 : sm_page_tbl = (ae_int_t*)ptr;
666 : sm_mem = (unsigned char*)ae_align(sm_page_tbl+sm_page_cnt, sm_page_size);
667 :
668 : /*
669 : * Mark all pages as free
670 : */
671 : memset(sm_page_tbl, 0, sm_page_cnt*sizeof(ae_int_t));
672 : }
673 :
674 : void* ae_static_malloc(size_t size, size_t alignment)
675 : {
676 : int rq_pages, i, j, cur_len;
677 :
678 : AE_CRITICAL_ASSERT(size>=0);
679 : AE_CRITICAL_ASSERT(sm_page_size>0);
680 : AE_CRITICAL_ASSERT(sm_page_cnt>0);
681 : AE_CRITICAL_ASSERT(sm_page_tbl!=NULL);
682 : AE_CRITICAL_ASSERT(sm_mem!=NULL);
683 :
684 : if( size==0 )
685 : return NULL;
686 : if( _force_malloc_failure )
687 : return NULL;
688 :
689 : /* check that page alignment and requested alignment match each other */
690 : AE_CRITICAL_ASSERT(alignment<=sm_page_size);
691 : AE_CRITICAL_ASSERT((sm_page_size%alignment)==0);
692 :
693 : /* search long enough sequence of pages */
694 : rq_pages = size/sm_page_size;
695 : if( size%sm_page_size )
696 : rq_pages++;
697 : cur_len = 0;
698 : for(i=0; i<sm_page_cnt;)
699 : {
700 : /* determine length of the sequence of free pages */
701 : if( sm_page_tbl[i]==0 )
702 : cur_len++;
703 : else
704 : {
705 : AE_CRITICAL_ASSERT(sm_page_tbl[i]>0);
706 : cur_len=0;
707 : i += sm_page_tbl[i];
708 : continue;
709 : }
710 :
711 : /* found it? */
712 : if( cur_len>=rq_pages )
713 : {
714 : /* update counters (if flag is set) */
715 : if( _use_alloc_counter )
716 : {
717 : ae_optional_atomic_add_i(&_alloc_counter, 1);
718 : ae_optional_atomic_add_i(&_alloc_counter_total, 1);
719 : }
720 : if( _use_dbg_counters )
721 : ae_optional_atomic_add_i(&_dbg_alloc_total, size);
722 :
723 : /* mark pages and return */
724 : for(j=0; j<rq_pages; j++)
725 : sm_page_tbl[i-j] = -1;
726 : sm_page_tbl[i-(rq_pages-1)] = rq_pages;
727 : return sm_mem+(i-(rq_pages-1))*sm_page_size;
728 : }
729 :
730 : /* next element */
731 : i++;
732 : }
733 : return NULL;
734 : }
735 :
736 : void ae_static_free(void *block)
737 : {
738 : ae_int_t page_idx, page_cnt, i;
739 : if( block==NULL )
740 : return;
741 : page_idx = (unsigned char*)block-sm_mem;
742 : AE_CRITICAL_ASSERT(page_idx>=0);
743 : AE_CRITICAL_ASSERT((page_idx%sm_page_size)==0);
744 : page_idx = page_idx/sm_page_size;
745 : AE_CRITICAL_ASSERT(page_idx<sm_page_cnt);
746 : page_cnt = sm_page_tbl[page_idx];
747 : AE_CRITICAL_ASSERT(page_cnt>=1);
748 : for(i=0; i<page_cnt; i++)
749 : sm_page_tbl[page_idx+i] = 0;
750 :
751 : /* update counters (if flag is set) */
752 : if( _use_alloc_counter )
753 : ae_optional_atomic_sub_i(&_alloc_counter, 1);
754 : }
755 :
756 : void memory_pool_stats(ae_int_t *bytes_used, ae_int_t *bytes_free)
757 : {
758 : int i;
759 :
760 : AE_CRITICAL_ASSERT(sm_page_size>0);
761 : AE_CRITICAL_ASSERT(sm_page_cnt>0);
762 : AE_CRITICAL_ASSERT(sm_page_tbl!=NULL);
763 : AE_CRITICAL_ASSERT(sm_mem!=NULL);
764 :
765 : /* scan page table */
766 : *bytes_used = 0;
767 : *bytes_free = 0;
768 : for(i=0; i<sm_page_cnt;)
769 : {
770 : if( sm_page_tbl[i]==0 )
771 : {
772 : (*bytes_free)++;
773 : i++;
774 : }
775 : else
776 : {
777 : AE_CRITICAL_ASSERT(sm_page_tbl[i]>0);
778 : *bytes_used += sm_page_tbl[i];
779 : i += sm_page_tbl[i];
780 : }
781 : }
782 : *bytes_used *= sm_page_size;
783 : *bytes_free *= sm_page_size;
784 : }
785 : #endif
786 :
787 0 : void* aligned_malloc(size_t size, size_t alignment)
788 : {
789 : #if AE_MALLOC==AE_BASIC_STATIC_MALLOC
790 : return ae_static_malloc(size, alignment);
791 : #else
792 0 : char *result = NULL;
793 :
794 0 : if( size==0 )
795 0 : return NULL;
796 0 : if( _force_malloc_failure )
797 0 : return NULL;
798 0 : if( _malloc_failure_after>0 && _alloc_counter_total>=_malloc_failure_after )
799 0 : return NULL;
800 :
801 : /* allocate */
802 0 : if( alignment<=1 )
803 : {
804 : /* no alignment, just call alloc */
805 : void *block;
806 : void **p; ;
807 0 : block = malloc(sizeof(void*)+size);
808 0 : if( block==NULL )
809 0 : return NULL;
810 0 : p = (void**)block;
811 0 : *p = block;
812 0 : result = (char*)((char*)block+sizeof(void*));
813 : }
814 : else
815 : {
816 : /* align */
817 : void *block;
818 0 : block = malloc(alignment-1+sizeof(void*)+size);
819 0 : if( block==NULL )
820 0 : return NULL;
821 0 : result = (char*)block+sizeof(void*);
822 : /*if( (result-(char*)0)%alignment!=0 )
823 : result += alignment - (result-(char*)0)%alignment;*/
824 0 : result = (char*)ae_align(result, alignment);
825 0 : *((void**)(result-sizeof(void*))) = block;
826 : }
827 :
828 : /* update counters (if flag is set) */
829 0 : if( _use_alloc_counter )
830 : {
831 0 : ae_optional_atomic_add_i(&_alloc_counter, 1);
832 0 : ae_optional_atomic_add_i(&_alloc_counter_total, 1);
833 : }
834 0 : if( _use_dbg_counters )
835 0 : ae_optional_atomic_add_i(&_dbg_alloc_total, (ae_int64_t)size);
836 :
837 : /* return */
838 0 : return (void*)result;
839 : #endif
840 : }
841 :
842 0 : void* aligned_extract_ptr(void *block)
843 : {
844 : #if AE_MALLOC==AE_BASIC_STATIC_MALLOC
845 : return NULL;
846 : #else
847 0 : if( block==NULL )
848 0 : return NULL;
849 0 : return *((void**)((char*)block-sizeof(void*)));
850 : #endif
851 : }
852 :
853 0 : void aligned_free(void *block)
854 : {
855 : #if AE_MALLOC==AE_BASIC_STATIC_MALLOC
856 : ae_static_free(block);
857 : #else
858 : void *p;
859 0 : if( block==NULL )
860 0 : return;
861 0 : p = aligned_extract_ptr(block);
862 0 : free(p);
863 0 : if( _use_alloc_counter )
864 0 : ae_optional_atomic_sub_i(&_alloc_counter, 1);
865 : #endif
866 : }
867 :
868 0 : void* eternal_malloc(size_t size)
869 : {
870 0 : if( size==0 )
871 0 : return NULL;
872 0 : if( _force_malloc_failure )
873 0 : return NULL;
874 0 : return malloc(size);
875 : }
876 :
877 : /************************************************************************
878 : Allocate memory with automatic alignment.
879 :
880 : Returns NULL when zero size is specified.
881 :
882 : Error handling:
883 : * if state is NULL, returns NULL on allocation error
884 : * if state is not NULL, calls ae_break() on allocation error
885 : ************************************************************************/
886 0 : void* ae_malloc(size_t size, ae_state *state)
887 : {
888 : void *result;
889 0 : if( size==0 )
890 0 : return NULL;
891 0 : result = aligned_malloc(size,AE_DATA_ALIGN);
892 0 : if( result==NULL && state!=NULL)
893 0 : ae_break(state, ERR_OUT_OF_MEMORY, "ae_malloc(): out of memory");
894 0 : return result;
895 : }
896 :
897 0 : void ae_free(void *p)
898 : {
899 0 : if( p!=NULL )
900 0 : aligned_free(p);
901 0 : }
902 :
903 : /************************************************************************
904 : Sets pointers to the matrix rows.
905 :
906 : * dst must be correctly initialized matrix
907 : * dst->data.ptr points to the beginning of memory block allocated for
908 : row pointers.
909 : * dst->ptr - undefined (initialized during algorithm processing)
910 : * storage parameter points to the beginning of actual storage
911 : ************************************************************************/
912 0 : void ae_matrix_update_row_pointers(ae_matrix *dst, void *storage)
913 : {
914 : char *p_base;
915 : void **pp_ptr;
916 : ae_int_t i;
917 0 : if( dst->rows>0 && dst->cols>0 )
918 : {
919 0 : p_base = (char*)storage;
920 0 : pp_ptr = (void**)dst->data.ptr;
921 0 : dst->ptr.pp_void = pp_ptr;
922 0 : for(i=0; i<dst->rows; i++, p_base+=dst->stride*ae_sizeof(dst->datatype))
923 0 : pp_ptr[i] = p_base;
924 : }
925 : else
926 0 : dst->ptr.pp_void = NULL;
927 0 : }
928 :
929 : /************************************************************************
930 : Returns size of datatype.
931 : Zero for dynamic types like strings or multiple precision types.
932 : ************************************************************************/
933 0 : ae_int_t ae_sizeof(ae_datatype datatype)
934 : {
935 0 : switch(datatype)
936 : {
937 0 : case DT_BOOL: return (ae_int_t)sizeof(ae_bool);
938 0 : case DT_INT: return (ae_int_t)sizeof(ae_int_t);
939 0 : case DT_REAL: return (ae_int_t)sizeof(double);
940 0 : case DT_COMPLEX: return 2*(ae_int_t)sizeof(double);
941 0 : default: return 0;
942 : }
943 : }
944 :
945 : /************************************************************************
946 : Checks that n bytes pointed by ptr are zero.
947 :
948 : This function is used in the constructors to check that instance fields
949 : on entry are correctly initialized by zeros.
950 : ************************************************************************/
951 0 : ae_bool ae_check_zeros(const void *ptr, ae_int_t n)
952 : {
953 : ae_int_t nu, nr, i;
954 0 : unsigned long long c = 0x0;
955 :
956 : /*
957 : * determine leading and trailing lengths
958 : */
959 0 : nu = n/sizeof(unsigned long long);
960 0 : nr = n%sizeof(unsigned long long);
961 :
962 : /*
963 : * handle leading nu long long elements
964 : */
965 0 : if( nu>0 )
966 : {
967 : const unsigned long long *p_ull;
968 0 : p_ull = (const unsigned long long *)ptr;
969 0 : for(i=0; i<nu; i++)
970 0 : c |= p_ull[i];
971 : }
972 :
973 : /*
974 : * handle trailing nr char elements
975 : */
976 0 : if( nr>0 )
977 : {
978 : const unsigned char *p_uc;
979 0 : p_uc = ((const unsigned char *)ptr)+nu*sizeof(unsigned long long);
980 0 : for(i=0; i<nr; i++)
981 0 : c |= p_uc[i];
982 : }
983 :
984 : /*
985 : * done
986 : */
987 0 : return c==0x0;
988 : }
989 :
990 :
991 : /************************************************************************
992 : This dummy function is used to prevent compiler messages about unused
993 : locals in automatically generated code.
994 :
995 : It makes nothing - just accepts pointer, "touches" it - and that is all.
996 : It performs several tricky operations without side effects which confuse
997 : compiler so it does not compain about unused locals in THIS function.
998 : ************************************************************************/
999 0 : void ae_touch_ptr(void *p)
1000 : {
1001 0 : void * volatile fake_variable0 = p;
1002 0 : void * volatile fake_variable1 = fake_variable0;
1003 0 : fake_variable0 = fake_variable1;
1004 0 : }
1005 :
1006 : /************************************************************************
1007 : This function initializes ALGLIB environment state.
1008 :
1009 : NOTES:
1010 : * stacks contain no frames, so ae_make_frame() must be called before
1011 : attaching dynamic blocks. Without it ae_leave_frame() will cycle
1012 : forever (which is intended behavior).
1013 : ************************************************************************/
1014 24 : void ae_state_init(ae_state *state)
1015 : {
1016 : ae_int32_t *vp;
1017 :
1018 : /*
1019 : * Set flags
1020 : */
1021 24 : state->flags = 0x0;
1022 :
1023 : /*
1024 : * p_next points to itself because:
1025 : * * correct program should be able to detect end of the list
1026 : * by looking at the ptr field.
1027 : * * NULL p_next may be used to distinguish automatic blocks
1028 : * (in the list) from non-automatic (not in the list)
1029 : */
1030 24 : state->last_block.p_next = &(state->last_block);
1031 24 : state->last_block.deallocator = NULL;
1032 24 : state->last_block.ptr = DYN_BOTTOM;
1033 24 : state->p_top_block = &(state->last_block);
1034 24 : state->break_jump = NULL;
1035 24 : state->error_msg = "";
1036 :
1037 : /*
1038 : * determine endianness and initialize precomputed IEEE special quantities.
1039 : */
1040 24 : state->endianness = ae_get_endianness();
1041 24 : if( state->endianness==AE_LITTLE_ENDIAN )
1042 : {
1043 24 : vp = (ae_int32_t*)(&state->v_nan);
1044 24 : vp[0] = 0;
1045 24 : vp[1] = (ae_int32_t)0x7FF80000;
1046 24 : vp = (ae_int32_t*)(&state->v_posinf);
1047 24 : vp[0] = 0;
1048 24 : vp[1] = (ae_int32_t)0x7FF00000;
1049 24 : vp = (ae_int32_t*)(&state->v_neginf);
1050 24 : vp[0] = 0;
1051 24 : vp[1] = (ae_int32_t)0xFFF00000;
1052 : }
1053 0 : else if( state->endianness==AE_BIG_ENDIAN )
1054 : {
1055 0 : vp = (ae_int32_t*)(&state->v_nan);
1056 0 : vp[1] = 0;
1057 0 : vp[0] = (ae_int32_t)0x7FF80000;
1058 0 : vp = (ae_int32_t*)(&state->v_posinf);
1059 0 : vp[1] = 0;
1060 0 : vp[0] = (ae_int32_t)0x7FF00000;
1061 0 : vp = (ae_int32_t*)(&state->v_neginf);
1062 0 : vp[1] = 0;
1063 0 : vp[0] = (ae_int32_t)0xFFF00000;
1064 : }
1065 : else
1066 0 : abort();
1067 :
1068 : /*
1069 : * set threading information
1070 : */
1071 24 : state->worker_thread = NULL;
1072 24 : state->parent_task = NULL;
1073 24 : state->thread_exception_handler = NULL;
1074 24 : }
1075 :
1076 :
1077 : /************************************************************************
1078 : This function clears ALGLIB environment state.
1079 : All dynamic data controlled by state are freed.
1080 : ************************************************************************/
1081 24 : void ae_state_clear(ae_state *state)
1082 : {
1083 24 : while( state->p_top_block->ptr!=DYN_BOTTOM )
1084 0 : ae_frame_leave(state);
1085 24 : }
1086 :
1087 :
1088 : /************************************************************************
1089 : This function sets jump buffer for error handling.
1090 :
1091 : buf may be NULL.
1092 : ************************************************************************/
1093 0 : void ae_state_set_break_jump(ae_state *state, jmp_buf *buf)
1094 : {
1095 0 : state->break_jump = buf;
1096 0 : }
1097 :
1098 :
1099 : /************************************************************************
1100 : This function sets flags member of the ae_state structure
1101 :
1102 : buf may be NULL.
1103 : ************************************************************************/
1104 0 : void ae_state_set_flags(ae_state *state, ae_uint64_t flags)
1105 : {
1106 0 : state->flags = flags;
1107 0 : }
1108 :
1109 :
1110 : /************************************************************************
1111 : This function makes new stack frame.
1112 :
1113 : This function takes two parameters: environment state and pointer to the
1114 : dynamic block which will be used as indicator of the frame beginning.
1115 : This dynamic block must be initialized by caller and mustn't be changed/
1116 : deallocated/reused till ae_leave_frame called. It may be global or local
1117 : variable (local is even better).
1118 : ************************************************************************/
1119 0 : void ae_frame_make(ae_state *state, ae_frame *tmp)
1120 : {
1121 0 : tmp->db_marker.p_next = state->p_top_block;
1122 0 : tmp->db_marker.deallocator = NULL;
1123 0 : tmp->db_marker.ptr = DYN_FRAME;
1124 0 : state->p_top_block = &tmp->db_marker;
1125 0 : }
1126 :
1127 :
1128 : /************************************************************************
1129 : This function leaves current stack frame and deallocates all automatic
1130 : dynamic blocks which were attached to this frame.
1131 : ************************************************************************/
1132 0 : void ae_frame_leave(ae_state *state)
1133 : {
1134 0 : while( state->p_top_block->ptr!=DYN_FRAME && state->p_top_block->ptr!=DYN_BOTTOM)
1135 : {
1136 0 : if( state->p_top_block->ptr!=NULL && state->p_top_block->deallocator!=NULL)
1137 0 : ((ae_deallocator)(state->p_top_block->deallocator))(state->p_top_block->ptr);
1138 0 : state->p_top_block = state->p_top_block->p_next;
1139 : }
1140 0 : state->p_top_block = state->p_top_block->p_next;
1141 0 : }
1142 :
1143 :
1144 : /************************************************************************
1145 : This function attaches block to the dynamic block list
1146 :
1147 : block block
1148 : state ALGLIB environment state
1149 :
1150 : This function does NOT generate exceptions.
1151 :
1152 : NOTES:
1153 : * never call it for special blocks which marks frame boundaries!
1154 : ************************************************************************/
1155 0 : void ae_db_attach(ae_dyn_block *block, ae_state *state)
1156 : {
1157 0 : block->p_next = state->p_top_block;
1158 0 : state->p_top_block = block;
1159 0 : }
1160 :
1161 :
1162 : /************************************************************************
1163 : This function initializes dynamic block:
1164 :
1165 : block destination block, MUST be zero-filled on entry
1166 : size size (in bytes), >=0.
1167 : state ALGLIB environment state, non-NULL
1168 : make_automatic if true, vector is added to the dynamic block list
1169 :
1170 : block is assumed to be uninitialized, its fields are ignored. You may
1171 : call this function with zero size in order to register block in the
1172 : dynamic list.
1173 :
1174 : Error handling: calls ae_break() on allocation error. Block is left in
1175 : valid state (empty, but valid).
1176 :
1177 : NOTES:
1178 : * never call it for blocks which are already in the list; use ae_db_realloc
1179 : for already allocated blocks.
1180 :
1181 : NOTE: no memory allocation is performed for initialization with size=0
1182 : ************************************************************************/
1183 0 : void ae_db_init(ae_dyn_block *block, ae_int_t size, ae_state *state, ae_bool make_automatic)
1184 : {
1185 0 : AE_CRITICAL_ASSERT(state!=NULL);
1186 0 : AE_CRITICAL_ASSERT(ae_check_zeros(block,sizeof(*block)));
1187 :
1188 : /*
1189 : * NOTE: these strange dances around block->ptr are necessary
1190 : * in order to correctly handle possible exceptions during
1191 : * memory allocation.
1192 : */
1193 0 : ae_assert(size>=0, "ae_db_init(): negative size", state);
1194 0 : block->ptr = NULL;
1195 0 : block->valgrind_hint = NULL;
1196 0 : ae_touch_ptr(block->ptr);
1197 0 : ae_touch_ptr(block->valgrind_hint);
1198 0 : if( make_automatic )
1199 0 : ae_db_attach(block, state);
1200 : else
1201 0 : block->p_next = NULL;
1202 0 : if( size!=0 )
1203 : {
1204 0 : block->ptr = ae_malloc((size_t)size, state);
1205 0 : block->valgrind_hint = aligned_extract_ptr(block->ptr);
1206 : }
1207 0 : block->deallocator = ae_free;
1208 0 : }
1209 :
1210 :
1211 : /************************************************************************
1212 : This function realloc's dynamic block:
1213 :
1214 : block destination block (initialized)
1215 : size new size (in bytes)
1216 : state ALGLIB environment state
1217 :
1218 : block is assumed to be initialized.
1219 :
1220 : This function:
1221 : * deletes old contents
1222 : * preserves automatic state
1223 :
1224 : Error handling: calls ae_break() on allocation error. Block is left in
1225 : valid state - empty, but valid.
1226 :
1227 : NOTES:
1228 : * never call it for special blocks which mark frame boundaries!
1229 : ************************************************************************/
1230 0 : void ae_db_realloc(ae_dyn_block *block, ae_int_t size, ae_state *state)
1231 : {
1232 0 : AE_CRITICAL_ASSERT(state!=NULL);
1233 :
1234 : /*
1235 : * NOTE: these strange dances around block->ptr are necessary
1236 : * in order to correctly handle possible exceptions during
1237 : * memory allocation.
1238 : */
1239 0 : ae_assert(size>=0, "ae_db_realloc(): negative size", state);
1240 0 : if( block->ptr!=NULL )
1241 : {
1242 0 : ((ae_deallocator)block->deallocator)(block->ptr);
1243 0 : block->ptr = NULL;
1244 0 : block->valgrind_hint = NULL;
1245 : }
1246 0 : block->ptr = ae_malloc((size_t)size, state);
1247 0 : block->valgrind_hint = aligned_extract_ptr(block->ptr);
1248 0 : block->deallocator = ae_free;
1249 0 : }
1250 :
1251 :
1252 : /************************************************************************
1253 : This function clears dynamic block (releases all dynamically allocated
1254 : memory). Dynamic block may be in automatic management list - in this case
1255 : it will NOT be removed from list.
1256 :
1257 : block destination block (initialized)
1258 :
1259 : NOTES:
1260 : * never call it for special blocks which marks frame boundaries!
1261 : ************************************************************************/
1262 0 : void ae_db_free(ae_dyn_block *block)
1263 : {
1264 0 : if( block->ptr!=NULL )
1265 0 : ((ae_deallocator)block->deallocator)(block->ptr);
1266 0 : block->ptr = NULL;
1267 0 : block->valgrind_hint = NULL;
1268 0 : block->deallocator = ae_free;
1269 0 : }
1270 :
1271 : /************************************************************************
1272 : This function swaps contents of two dynamic blocks (pointers and
1273 : deallocators) leaving other parameters (automatic management settings,
1274 : etc.) unchanged.
1275 :
1276 : NOTES:
1277 : * never call it for special blocks which marks frame boundaries!
1278 : ************************************************************************/
1279 0 : void ae_db_swap(ae_dyn_block *block1, ae_dyn_block *block2)
1280 : {
1281 0 : void (*deallocator)(void*) = NULL;
1282 : void * volatile ptr;
1283 : void * valgrind_hint;
1284 :
1285 0 : ptr = block1->ptr;
1286 0 : valgrind_hint = block1->valgrind_hint;
1287 0 : deallocator = block1->deallocator;
1288 :
1289 0 : block1->ptr = block2->ptr;
1290 0 : block1->valgrind_hint = block2->valgrind_hint;
1291 0 : block1->deallocator = block2->deallocator;
1292 :
1293 0 : block2->ptr = ptr;
1294 0 : block2->valgrind_hint = valgrind_hint;
1295 0 : block2->deallocator = deallocator;
1296 0 : }
1297 :
1298 : /*************************************************************************
1299 : This function creates ae_vector.
1300 : Vector size may be zero. Vector contents is uninitialized.
1301 :
1302 : dst destination vector, MUST be zero-filled (we check it
1303 : and call abort() if *dst is non-zero; the rationale is
1304 : that we can not correctly handle errors in constructors
1305 : without zero-filling).
1306 : size vector size, may be zero
1307 : datatype guess what...
1308 : state pointer to current state structure. Can not be NULL.
1309 : used for exception handling (say, allocation error results
1310 : in longjmp call).
1311 : make_automatic if true, vector will be registered in the current frame
1312 : of the state structure;
1313 :
1314 : NOTE: no memory allocation is performed for initialization with size=0
1315 : *************************************************************************/
1316 0 : void ae_vector_init(ae_vector *dst, ae_int_t size, ae_datatype datatype, ae_state *state, ae_bool make_automatic)
1317 : {
1318 : /*
1319 : * Integrity checks
1320 : */
1321 0 : AE_CRITICAL_ASSERT(state!=NULL);
1322 0 : AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
1323 0 : ae_assert(size>=0, "ae_vector_init(): negative size", state);
1324 :
1325 : /* prepare for possible errors during allocation */
1326 0 : dst->cnt = 0;
1327 0 : dst->ptr.p_ptr = NULL;
1328 :
1329 : /* init */
1330 0 : ae_db_init(&dst->data, size*ae_sizeof(datatype), state, make_automatic);
1331 0 : dst->cnt = size;
1332 0 : dst->datatype = datatype;
1333 0 : dst->ptr.p_ptr = dst->data.ptr;
1334 0 : dst->is_attached = ae_false;
1335 0 : }
1336 :
1337 :
1338 : /************************************************************************
1339 : This function creates copy of ae_vector. New copy of the data is created,
1340 : which is managed and owned by newly initialized vector.
1341 :
1342 : dst destination vector, MUST be zero-filled (we check it
1343 : and call abort() if *dst is non-zero; the rationale is
1344 : that we can not correctly handle errors in constructors
1345 : without zero-filling).
1346 : src well, it is source
1347 : state pointer to current state structure. Can not be NULL.
1348 : used for exception handling (say, allocation error results
1349 : in longjmp call).
1350 : make_automatic if true, vector will be registered in the current frame
1351 : of the state structure;
1352 :
1353 : dst is assumed to be uninitialized, its fields are ignored.
1354 : ************************************************************************/
1355 0 : void ae_vector_init_copy(ae_vector *dst, ae_vector *src, ae_state *state, ae_bool make_automatic)
1356 : {
1357 0 : AE_CRITICAL_ASSERT(state!=NULL);
1358 :
1359 0 : ae_vector_init(dst, src->cnt, src->datatype, state, make_automatic);
1360 0 : if( src->cnt!=0 )
1361 0 : memmove(dst->ptr.p_ptr, src->ptr.p_ptr, (size_t)(src->cnt*ae_sizeof(src->datatype)));
1362 0 : }
1363 :
1364 : /************************************************************************
1365 : This function initializes ae_vector using X-structure as source. New copy
1366 : of data is created, which is owned/managed by ae_vector structure. Both
1367 : structures (source and destination) remain completely independent after
1368 : this call.
1369 :
1370 : dst destination vector, MUST be zero-filled (we check it
1371 : and call abort() if *dst is non-zero; the rationale is
1372 : that we can not correctly handle errors in constructors
1373 : without zero-filling).
1374 : src well, it is source
1375 : state pointer to current state structure. Can not be NULL.
1376 : used for exception handling (say, allocation error results
1377 : in longjmp call).
1378 : make_automatic if true, vector will be registered in the current frame
1379 : of the state structure;
1380 :
1381 : dst is assumed to be uninitialized, its fields are ignored.
1382 : ************************************************************************/
1383 0 : void ae_vector_init_from_x(ae_vector *dst, x_vector *src, ae_state *state, ae_bool make_automatic)
1384 : {
1385 0 : AE_CRITICAL_ASSERT(state!=NULL);
1386 :
1387 0 : ae_vector_init(dst, (ae_int_t)src->cnt, (ae_datatype)src->datatype, state, make_automatic);
1388 0 : if( src->cnt>0 )
1389 0 : memmove(dst->ptr.p_ptr, src->x_ptr.p_ptr, (size_t)(((ae_int_t)src->cnt)*ae_sizeof((ae_datatype)src->datatype)));
1390 0 : }
1391 :
1392 : /************************************************************************
1393 : This function initializes ae_vector using X-structure as source.
1394 :
1395 : New vector is attached to source:
1396 : * DST shares memory with SRC
1397 : * both DST and SRC are writable - all writes to DST change elements of
1398 : SRC and vice versa.
1399 : * DST can be reallocated with ae_vector_set_length(), in this case SRC
1400 : remains untouched
1401 : * SRC, however, CAN NOT BE REALLOCATED AS LONG AS DST EXISTS
1402 :
1403 : NOTE: is_attached field is set to ae_true in order to indicate that
1404 : vector does not own its memory.
1405 :
1406 : dst destination vector
1407 : src well, it is source
1408 : state pointer to current state structure. Can not be NULL.
1409 : used for exception handling (say, allocation error results
1410 : in longjmp call).
1411 : make_automatic if true, vector will be registered in the current frame
1412 : of the state structure;
1413 :
1414 : dst is assumed to be uninitialized, its fields are ignored.
1415 : ************************************************************************/
1416 0 : void ae_vector_init_attach_to_x(ae_vector *dst, x_vector *src, ae_state *state, ae_bool make_automatic)
1417 : {
1418 : volatile ae_int_t cnt;
1419 :
1420 0 : AE_CRITICAL_ASSERT(state!=NULL);
1421 0 : AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
1422 :
1423 0 : cnt = (ae_int_t)src->cnt;
1424 :
1425 : /* ensure that size is correct */
1426 0 : ae_assert(cnt==src->cnt, "ae_vector_init_attach_to_x(): 32/64 overflow", state);
1427 0 : ae_assert(cnt>=0, "ae_vector_init_attach_to_x(): negative length", state);
1428 :
1429 : /* prepare for possible errors during allocation */
1430 0 : dst->cnt = 0;
1431 0 : dst->ptr.p_ptr = NULL;
1432 0 : dst->datatype = (ae_datatype)src->datatype;
1433 :
1434 : /* zero-size init in order to correctly register in the frame */
1435 0 : ae_db_init(&dst->data, 0, state, make_automatic);
1436 :
1437 : /* init */
1438 0 : dst->cnt = cnt;
1439 0 : dst->ptr.p_ptr = src->x_ptr.p_ptr;
1440 0 : dst->is_attached = ae_true;
1441 0 : }
1442 :
1443 : /************************************************************************
1444 : This function changes length of ae_vector.
1445 :
1446 : dst destination vector
1447 : newsize vector size, may be zero
1448 : state ALGLIB environment state, can not be NULL
1449 :
1450 : Error handling: calls ae_break() on allocation error
1451 :
1452 : NOTES:
1453 : * vector must be initialized
1454 : * all contents is destroyed during setlength() call
1455 : * new size may be zero.
1456 : ************************************************************************/
1457 0 : void ae_vector_set_length(ae_vector *dst, ae_int_t newsize, ae_state *state)
1458 : {
1459 0 : AE_CRITICAL_ASSERT(state!=NULL);
1460 0 : ae_assert(newsize>=0, "ae_vector_set_length(): negative size", state);
1461 0 : if( dst->cnt==newsize )
1462 0 : return;
1463 :
1464 : /* realloc, being ready for exception during reallocation (cnt=ptr=0 on entry) */
1465 0 : dst->cnt = 0;
1466 0 : dst->ptr.p_ptr = NULL;
1467 0 : ae_db_realloc(&dst->data, newsize*ae_sizeof(dst->datatype), state);
1468 0 : dst->cnt = newsize;
1469 0 : dst->ptr.p_ptr = dst->data.ptr;
1470 : }
1471 :
1472 : /************************************************************************
1473 : This function resized ae_vector, preserving previously existing elements.
1474 : Values of elements added during vector growth is undefined.
1475 :
1476 : dst destination vector
1477 : newsize vector size, may be zero
1478 : state ALGLIB environment state, can not be NULL
1479 :
1480 : Error handling: calls ae_break() on allocation error
1481 :
1482 : NOTES:
1483 : * vector must be initialized
1484 : * new size may be zero.
1485 : ************************************************************************/
1486 0 : void ae_vector_resize(ae_vector *dst, ae_int_t newsize, ae_state *state)
1487 : {
1488 : ae_vector tmp;
1489 : ae_int_t bytes_total;
1490 :
1491 0 : memset(&tmp, 0, sizeof(tmp));
1492 0 : ae_vector_init(&tmp, newsize, dst->datatype, state, ae_false);
1493 0 : bytes_total = (dst->cnt<newsize ? dst->cnt : newsize)*ae_sizeof(dst->datatype);
1494 0 : if( bytes_total>0 )
1495 0 : memmove(tmp.ptr.p_ptr, dst->ptr.p_ptr, bytes_total);
1496 0 : ae_swap_vectors(dst, &tmp);
1497 0 : ae_vector_clear(&tmp);
1498 0 : }
1499 :
1500 :
1501 : /************************************************************************
1502 : This function provides "CLEAR" functionality for vector (contents is
1503 : cleared, but structure still left in valid state).
1504 :
1505 : The function clears vector contents (releases all dynamically allocated
1506 : memory). Vector may be in automatic management list - in this case it
1507 : will NOT be removed from list.
1508 :
1509 : IMPORTANT: this function does NOT invalidates dst; it just releases all
1510 : dynamically allocated storage, but dst still may be used after call to
1511 : ae_vector_set_length().
1512 :
1513 : dst destination vector
1514 : ************************************************************************/
1515 0 : void ae_vector_clear(ae_vector *dst)
1516 : {
1517 0 : dst->cnt = 0;
1518 0 : ae_db_free(&dst->data);
1519 0 : dst->ptr.p_ptr = 0;
1520 0 : dst->is_attached = ae_false;
1521 0 : }
1522 :
1523 :
1524 : /************************************************************************
1525 : This function provides "DESTROY" functionality for vector (contents is
1526 : cleared, all internal structures are destroyed). For vectors it is same
1527 : as CLEAR.
1528 :
1529 : dst destination vector
1530 : ************************************************************************/
1531 0 : void ae_vector_destroy(ae_vector *dst)
1532 : {
1533 0 : ae_vector_clear(dst);
1534 0 : }
1535 :
1536 :
1537 : /************************************************************************
1538 : This function efficiently swaps contents of two vectors, leaving other
1539 : pararemeters (automatic management, etc.) unchanged.
1540 : ************************************************************************/
1541 0 : void ae_swap_vectors(ae_vector *vec1, ae_vector *vec2)
1542 : {
1543 : ae_int_t cnt;
1544 : ae_datatype datatype;
1545 : void *p_ptr;
1546 :
1547 0 : ae_assert(!vec1->is_attached, "ALGLIB: internal error, attempt to swap vectors attached to X-object", NULL);
1548 0 : ae_assert(!vec2->is_attached, "ALGLIB: internal error, attempt to swap vectors attached to X-object", NULL);
1549 :
1550 0 : ae_db_swap(&vec1->data, &vec2->data);
1551 :
1552 0 : cnt = vec1->cnt;
1553 0 : datatype = vec1->datatype;
1554 0 : p_ptr = vec1->ptr.p_ptr;
1555 0 : vec1->cnt = vec2->cnt;
1556 0 : vec1->datatype = vec2->datatype;
1557 0 : vec1->ptr.p_ptr = vec2->ptr.p_ptr;
1558 0 : vec2->cnt = cnt;
1559 0 : vec2->datatype = datatype;
1560 0 : vec2->ptr.p_ptr = p_ptr;
1561 0 : }
1562 :
1563 : /************************************************************************
1564 : This function creates ae_matrix.
1565 :
1566 : Matrix size may be zero, in such cases both rows and cols are zero.
1567 : Matrix contents is uninitialized.
1568 :
1569 : dst destination matrix, must be zero-filled
1570 : rows rows count
1571 : cols cols count
1572 : datatype element type
1573 : state pointer to current state structure. Can not be NULL.
1574 : used for exception handling (say, allocation error results
1575 : in longjmp call).
1576 : make_automatic if true, matrix will be registered in the current frame
1577 : of the state structure;
1578 :
1579 : dst is assumed to be uninitialized, its fields are ignored.
1580 :
1581 : NOTE: no memory allocation is performed for initialization with rows=cols=0
1582 : ************************************************************************/
1583 0 : void ae_matrix_init(ae_matrix *dst, ae_int_t rows, ae_int_t cols, ae_datatype datatype, ae_state *state, ae_bool make_automatic)
1584 : {
1585 0 : AE_CRITICAL_ASSERT(state!=NULL);
1586 0 : AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
1587 :
1588 0 : ae_assert(rows>=0 && cols>=0, "ae_matrix_init(): negative length", state);
1589 :
1590 : /* if one of rows/cols is zero, another MUST be too; perform quick exit */
1591 0 : if( rows==0 || cols==0 )
1592 : {
1593 0 : dst->rows = 0;
1594 0 : dst->cols = 0;
1595 0 : dst->is_attached = ae_false;
1596 0 : dst->ptr.pp_void = NULL;
1597 0 : dst->stride = 0;
1598 0 : dst->datatype = datatype;
1599 0 : ae_db_init(&dst->data, 0, state, make_automatic);
1600 0 : return;
1601 : }
1602 :
1603 : /* init, being ready for exception during allocation (rows=cols=ptr=NULL on entry) */
1604 0 : dst->is_attached = ae_false;
1605 0 : dst->rows = 0;
1606 0 : dst->cols = 0;
1607 0 : dst->ptr.pp_void = NULL;
1608 0 : dst->stride = cols;
1609 0 : while( dst->stride*ae_sizeof(datatype)%AE_DATA_ALIGN!=0 )
1610 0 : dst->stride++;
1611 0 : dst->datatype = datatype;
1612 0 : ae_db_init(&dst->data, rows*((ae_int_t)sizeof(void*)+dst->stride*ae_sizeof(datatype))+AE_DATA_ALIGN-1, state, make_automatic);
1613 0 : dst->rows = rows;
1614 0 : dst->cols = cols;
1615 0 : ae_matrix_update_row_pointers(dst, ae_align((char*)dst->data.ptr+rows*sizeof(void*),AE_DATA_ALIGN));
1616 : }
1617 :
1618 :
1619 : /************************************************************************
1620 : This function creates copy of ae_matrix. A new copy of the data is created.
1621 :
1622 : dst destination matrix, must be zero-filled
1623 : src well, it is source
1624 : state pointer to current state structure. Can not be NULL.
1625 : used for exception handling (say, allocation error results
1626 : in longjmp call).
1627 : make_automatic if true, matrix will be registered in the current frame
1628 : of the state structure;
1629 :
1630 : dst is assumed to be uninitialized, its fields are ignored.
1631 : ************************************************************************/
1632 0 : void ae_matrix_init_copy(ae_matrix *dst, ae_matrix *src, ae_state *state, ae_bool make_automatic)
1633 : {
1634 : ae_int_t i;
1635 0 : ae_matrix_init(dst, src->rows, src->cols, src->datatype, state, make_automatic);
1636 0 : if( src->rows!=0 && src->cols!=0 )
1637 : {
1638 0 : if( dst->stride==src->stride )
1639 0 : memmove(dst->ptr.pp_void[0], src->ptr.pp_void[0], (size_t)(src->rows*src->stride*ae_sizeof(src->datatype)));
1640 : else
1641 0 : for(i=0; i<dst->rows; i++)
1642 0 : memmove(dst->ptr.pp_void[i], src->ptr.pp_void[i], (size_t)(dst->cols*ae_sizeof(dst->datatype)));
1643 : }
1644 0 : }
1645 :
1646 :
1647 : /************************************************************************
1648 : This function initializes ae_matrix using X-structure as source. New copy
1649 : of data is created, which is owned/managed by ae_matrix structure. Both
1650 : structures (source and destination) remain completely independent after
1651 : this call.
1652 :
1653 : dst destination matrix, must be zero-filled
1654 : src well, it is source
1655 : state pointer to current state structure. Can not be NULL.
1656 : used for exception handling (say, allocation error results
1657 : in longjmp call).
1658 : make_automatic if true, matrix will be registered in the current frame
1659 : of the state structure;
1660 :
1661 : dst is assumed to be uninitialized, its fields are ignored.
1662 : ************************************************************************/
1663 0 : void ae_matrix_init_from_x(ae_matrix *dst, x_matrix *src, ae_state *state, ae_bool make_automatic)
1664 : {
1665 : char *p_src_row;
1666 : char *p_dst_row;
1667 : ae_int_t row_size;
1668 : ae_int_t i;
1669 0 : AE_CRITICAL_ASSERT(state!=NULL);
1670 0 : ae_matrix_init(dst, (ae_int_t)src->rows, (ae_int_t)src->cols, (ae_datatype)src->datatype, state, make_automatic);
1671 0 : if( src->rows!=0 && src->cols!=0 )
1672 : {
1673 0 : p_src_row = (char*)src->x_ptr.p_ptr;
1674 0 : p_dst_row = (char*)(dst->ptr.pp_void[0]);
1675 0 : row_size = ae_sizeof((ae_datatype)src->datatype)*(ae_int_t)src->cols;
1676 0 : for(i=0; i<src->rows; i++, p_src_row+=src->stride*ae_sizeof((ae_datatype)src->datatype), p_dst_row+=dst->stride*ae_sizeof((ae_datatype)src->datatype))
1677 0 : memmove(p_dst_row, p_src_row, (size_t)(row_size));
1678 : }
1679 0 : }
1680 :
1681 :
1682 : /************************************************************************
1683 : This function initializes ae_matrix using X-structure as source.
1684 :
1685 : New matrix is attached to source:
1686 : * DST shares memory with SRC
1687 : * both DST and SRC are writable - all writes to DST change elements of
1688 : SRC and vice versa.
1689 : * DST can be reallocated with ae_matrix_set_length(), in this case SRC
1690 : remains untouched
1691 : * SRC, however, CAN NOT BE REALLOCATED AS LONG AS DST EXISTS
1692 :
1693 : dst destination matrix, must be zero-filled
1694 : src well, it is source
1695 : state pointer to current state structure. Can not be NULL.
1696 : used for exception handling (say, allocation error results
1697 : in longjmp call).
1698 : make_automatic if true, matrix will be registered in the current frame
1699 : of the state structure;
1700 :
1701 : dst is assumed to be uninitialized, its fields are ignored.
1702 : ************************************************************************/
1703 0 : void ae_matrix_init_attach_to_x(ae_matrix *dst, x_matrix *src, ae_state *state, ae_bool make_automatic)
1704 : {
1705 : ae_int_t rows, cols;
1706 :
1707 0 : AE_CRITICAL_ASSERT(state!=NULL);
1708 0 : AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
1709 :
1710 0 : rows = (ae_int_t)src->rows;
1711 0 : cols = (ae_int_t)src->cols;
1712 :
1713 : /* check that X-source is densely packed */
1714 0 : ae_assert(src->cols==src->stride, "ae_matrix_init_attach_to_x(): unsupported stride", state);
1715 :
1716 : /* ensure that size is correct */
1717 0 : ae_assert(rows==src->rows, "ae_matrix_init_attach_to_x(): 32/64 overflow", state);
1718 0 : ae_assert(cols==src->cols, "ae_matrix_init_attach_to_x(): 32/64 overflow", state);
1719 0 : ae_assert(rows>=0 && cols>=0, "ae_matrix_init_attach_to_x(): negative length", state);
1720 :
1721 : /* if one of rows/cols is zero, another MUST be too */
1722 0 : if( rows==0 || cols==0 )
1723 : {
1724 0 : rows = 0;
1725 0 : cols = 0;
1726 : }
1727 :
1728 : /* init, being ready for allocation error */
1729 0 : dst->is_attached = ae_true;
1730 0 : dst->rows = 0;
1731 0 : dst->cols = 0;
1732 0 : dst->stride = cols;
1733 0 : dst->datatype = (ae_datatype)src->datatype;
1734 0 : dst->ptr.pp_void = NULL;
1735 0 : ae_db_init(&dst->data, rows*(ae_int_t)sizeof(void*), state, make_automatic);
1736 0 : dst->rows = rows;
1737 0 : dst->cols = cols;
1738 0 : if( dst->rows>0 && dst->cols>0 )
1739 : {
1740 : ae_int_t i, rowsize;
1741 : char *p_row;
1742 : void **pp_ptr;
1743 :
1744 0 : p_row = (char*)src->x_ptr.p_ptr;
1745 0 : rowsize = dst->stride*ae_sizeof(dst->datatype);
1746 0 : pp_ptr = (void**)dst->data.ptr;
1747 0 : dst->ptr.pp_void = pp_ptr;
1748 0 : for(i=0; i<dst->rows; i++, p_row+=rowsize)
1749 0 : pp_ptr[i] = p_row;
1750 : }
1751 0 : }
1752 :
1753 :
1754 : /************************************************************************
1755 : This function changes length of ae_matrix.
1756 :
1757 : dst destination matrix
1758 : rows size, may be zero
1759 : cols size, may be zero
1760 : state ALGLIB environment state
1761 :
1762 : Error handling:
1763 : * if state is NULL, returns ae_false on allocation error
1764 : * if state is not NULL, calls ae_break() on allocation error
1765 : * returns ae_true on success
1766 :
1767 : NOTES:
1768 : * matrix must be initialized
1769 : * all contents is destroyed during setlength() call
1770 : * new size may be zero.
1771 : ************************************************************************/
1772 0 : void ae_matrix_set_length(ae_matrix *dst, ae_int_t rows, ae_int_t cols, ae_state *state)
1773 : {
1774 0 : AE_CRITICAL_ASSERT(state!=NULL);
1775 0 : ae_assert(rows>=0 && cols>=0, "ae_matrix_set_length(): negative length", state);
1776 0 : if( dst->rows==rows && dst->cols==cols )
1777 0 : return;
1778 :
1779 : /* prepare stride */
1780 0 : dst->stride = cols;
1781 0 : while( dst->stride*ae_sizeof(dst->datatype)%AE_DATA_ALIGN!=0 )
1782 0 : dst->stride++;
1783 :
1784 : /* realloc, being ready for an exception during reallocation (rows=cols=0 on entry) */
1785 0 : dst->rows = 0;
1786 0 : dst->cols = 0;
1787 0 : dst->ptr.pp_void = NULL;
1788 0 : ae_db_realloc(&dst->data, rows*((ae_int_t)sizeof(void*)+dst->stride*ae_sizeof(dst->datatype))+AE_DATA_ALIGN-1, state);
1789 0 : dst->rows = rows;
1790 0 : dst->cols = cols;
1791 :
1792 : /* update pointers to rows */
1793 0 : ae_matrix_update_row_pointers(dst, ae_align((char*)dst->data.ptr+dst->rows*sizeof(void*),AE_DATA_ALIGN));
1794 : }
1795 :
1796 :
1797 : /************************************************************************
1798 : This function provides "CLEAR" functionality for vector (contents is
1799 : cleared, but structure still left in valid state).
1800 :
1801 : The function clears matrix contents (releases all dynamically allocated
1802 : memory). Matrix may be in automatic management list - in this case it
1803 : will NOT be removed from list.
1804 :
1805 : IMPORTANT: this function does NOT invalidates dst; it just releases all
1806 : dynamically allocated storage, but dst still may be used after call to
1807 : ae_matrix_set_length().
1808 :
1809 : dst destination matrix
1810 : ************************************************************************/
1811 0 : void ae_matrix_clear(ae_matrix *dst)
1812 : {
1813 0 : dst->rows = 0;
1814 0 : dst->cols = 0;
1815 0 : dst->stride = 0;
1816 0 : ae_db_free(&dst->data);
1817 0 : dst->ptr.p_ptr = 0;
1818 0 : dst->is_attached = ae_false;
1819 0 : }
1820 :
1821 :
1822 : /************************************************************************
1823 : This function provides "DESTROY" functionality for matrix (contents is
1824 : cleared, but structure still left in valid state).
1825 :
1826 : For matrices it is same as CLEAR.
1827 :
1828 : dst destination matrix
1829 : ************************************************************************/
1830 0 : void ae_matrix_destroy(ae_matrix *dst)
1831 : {
1832 0 : ae_matrix_clear(dst);
1833 0 : }
1834 :
1835 :
1836 : /************************************************************************
1837 : This function efficiently swaps contents of two vectors, leaving other
1838 : pararemeters (automatic management, etc.) unchanged.
1839 : ************************************************************************/
1840 0 : void ae_swap_matrices(ae_matrix *mat1, ae_matrix *mat2)
1841 : {
1842 : ae_int_t rows;
1843 : ae_int_t cols;
1844 : ae_int_t stride;
1845 : ae_datatype datatype;
1846 : void *p_ptr;
1847 :
1848 0 : ae_assert(!mat1->is_attached, "ALGLIB: internal error, attempt to swap matrices attached to X-object", NULL);
1849 0 : ae_assert(!mat2->is_attached, "ALGLIB: internal error, attempt to swap matrices attached to X-object", NULL);
1850 :
1851 0 : ae_db_swap(&mat1->data, &mat2->data);
1852 :
1853 0 : rows = mat1->rows;
1854 0 : cols = mat1->cols;
1855 0 : stride = mat1->stride;
1856 0 : datatype = mat1->datatype;
1857 0 : p_ptr = mat1->ptr.p_ptr;
1858 :
1859 0 : mat1->rows = mat2->rows;
1860 0 : mat1->cols = mat2->cols;
1861 0 : mat1->stride = mat2->stride;
1862 0 : mat1->datatype = mat2->datatype;
1863 0 : mat1->ptr.p_ptr = mat2->ptr.p_ptr;
1864 :
1865 0 : mat2->rows = rows;
1866 0 : mat2->cols = cols;
1867 0 : mat2->stride = stride;
1868 0 : mat2->datatype = datatype;
1869 0 : mat2->ptr.p_ptr = p_ptr;
1870 0 : }
1871 :
1872 :
1873 : /************************************************************************
1874 : This function creates smart pointer structure.
1875 :
1876 : dst destination smart pointer, must be zero-filled
1877 : subscriber pointer to pointer which receives updates in the
1878 : internal object stored in ae_smart_ptr. Any update to
1879 : dst->ptr is translated to subscriber. Can be NULL.
1880 : state pointer to current state structure. Can not be NULL.
1881 : used for exception handling (say, allocation error results
1882 : in longjmp call).
1883 : make_automatic if true, pointer will be registered in the current frame
1884 : of the state structure;
1885 :
1886 : Error handling:
1887 : * on failure calls ae_break() with NULL state pointer. Usually it results
1888 : in abort() call.
1889 :
1890 : After initialization, smart pointer stores NULL pointer.
1891 : ************************************************************************/
1892 0 : void ae_smart_ptr_init(ae_smart_ptr *dst, void **subscriber, ae_state *state, ae_bool make_automatic)
1893 : {
1894 0 : AE_CRITICAL_ASSERT(state!=NULL);
1895 0 : AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
1896 0 : dst->subscriber = subscriber;
1897 0 : dst->ptr = NULL;
1898 0 : if( dst->subscriber!=NULL )
1899 0 : *(dst->subscriber) = dst->ptr;
1900 0 : dst->is_owner = ae_false;
1901 0 : dst->is_dynamic = ae_false;
1902 0 : dst->frame_entry.deallocator = ae_smart_ptr_destroy;
1903 0 : dst->frame_entry.ptr = dst;
1904 0 : if( make_automatic )
1905 0 : ae_db_attach(&dst->frame_entry, state);
1906 0 : }
1907 :
1908 :
1909 : /************************************************************************
1910 : This function clears smart pointer structure.
1911 :
1912 : dst destination smart pointer.
1913 :
1914 : After call to this function smart pointer contains NULL reference, which
1915 : is propagated to its subscriber (in cases non-NULL subscruber was
1916 : specified during pointer creation).
1917 : ************************************************************************/
1918 0 : void ae_smart_ptr_clear(void *_dst)
1919 : {
1920 0 : ae_smart_ptr *dst = (ae_smart_ptr*)_dst;
1921 0 : if( dst->is_owner && dst->ptr!=NULL )
1922 : {
1923 0 : dst->destroy(dst->ptr);
1924 0 : if( dst->is_dynamic )
1925 0 : ae_free(dst->ptr);
1926 : }
1927 0 : dst->is_owner = ae_false;
1928 0 : dst->is_dynamic = ae_false;
1929 0 : dst->ptr = NULL;
1930 0 : dst->destroy = NULL;
1931 0 : if( dst->subscriber!=NULL )
1932 0 : *(dst->subscriber) = NULL;
1933 0 : }
1934 :
1935 :
1936 : /************************************************************************
1937 : This function dstroys smart pointer structure (same as clearing it).
1938 :
1939 : dst destination smart pointer.
1940 : ************************************************************************/
1941 0 : void ae_smart_ptr_destroy(void *_dst)
1942 : {
1943 0 : ae_smart_ptr_clear(_dst);
1944 0 : }
1945 :
1946 :
1947 : /************************************************************************
1948 : This function assigns pointer to ae_smart_ptr structure.
1949 :
1950 : dst destination smart pointer.
1951 : new_ptr new pointer to assign
1952 : is_owner whether smart pointer owns new_ptr
1953 : is_dynamic whether object is dynamic - clearing such object
1954 : requires BOTH calling destructor function AND calling
1955 : ae_free() for memory occupied by object.
1956 : destroy destructor function
1957 :
1958 : In case smart pointer already contains non-NULL value and owns this value,
1959 : it is freed before assigning new pointer.
1960 :
1961 : Changes in pointer are propagated to its subscriber (in case non-NULL
1962 : subscriber was specified during pointer creation).
1963 :
1964 : You can specify NULL new_ptr, in which case is_owner/destroy are ignored.
1965 : ************************************************************************/
1966 0 : void ae_smart_ptr_assign(ae_smart_ptr *dst, void *new_ptr, ae_bool is_owner, ae_bool is_dynamic, void (*destroy)(void*))
1967 : {
1968 0 : if( dst->is_owner && dst->ptr!=NULL )
1969 : {
1970 0 : dst->destroy(dst->ptr);
1971 0 : if( dst->is_dynamic )
1972 0 : ae_free(dst->ptr);
1973 : }
1974 0 : if( new_ptr!=NULL )
1975 : {
1976 0 : dst->ptr = new_ptr;
1977 0 : dst->is_owner = is_owner;
1978 0 : dst->is_dynamic = is_dynamic;
1979 0 : dst->destroy = destroy;
1980 : }
1981 : else
1982 : {
1983 0 : dst->ptr = NULL;
1984 0 : dst->is_owner = ae_false;
1985 0 : dst->is_dynamic = ae_false;
1986 0 : dst->destroy = NULL;
1987 : }
1988 0 : if( dst->subscriber!=NULL )
1989 0 : *(dst->subscriber) = dst->ptr;
1990 0 : }
1991 :
1992 :
1993 : /************************************************************************
1994 : This function releases pointer owned by ae_smart_ptr structure:
1995 : * all internal fields are set to NULL
1996 : * destructor function for internal pointer is NOT called even when we own
1997 : this pointer. After this call ae_smart_ptr releases ownership of its
1998 : pointer and passes it to caller.
1999 : * changes in pointer are propagated to its subscriber (in case non-NULL
2000 : subscriber was specified during pointer creation).
2001 :
2002 : dst destination smart pointer.
2003 : ************************************************************************/
2004 0 : void ae_smart_ptr_release(ae_smart_ptr *dst)
2005 : {
2006 0 : dst->is_owner = ae_false;
2007 0 : dst->is_dynamic = ae_false;
2008 0 : dst->ptr = NULL;
2009 0 : dst->destroy = NULL;
2010 0 : if( dst->subscriber!=NULL )
2011 0 : *(dst->subscriber) = NULL;
2012 0 : }
2013 :
2014 : /************************************************************************
2015 : This function copies contents of ae_vector (SRC) to x_vector (DST).
2016 :
2017 : This function should not be called for DST which is attached to SRC
2018 : (opposite situation, when SRC is attached to DST, is possible).
2019 :
2020 : Depending on situation, following actions are performed
2021 : * for SRC attached to DST, this function performs no actions (no need to
2022 : do anything)
2023 : * for independent vectors of different sizes it allocates storage in DST
2024 : and copy contents of SRC to DST. DST->last_action field is set to
2025 : ACT_NEW_LOCATION, and DST->owner is set to OWN_AE.
2026 : * for independent vectors of same sizes it does not perform memory
2027 : (re)allocation. It just copies SRC to already existing place.
2028 : DST->last_action is set to ACT_SAME_LOCATION (unless it was
2029 : ACT_NEW_LOCATION), DST->owner is unmodified.
2030 :
2031 : dst destination vector
2032 : src source, vector in x-format
2033 : state ALGLIB environment state
2034 :
2035 : NOTES:
2036 : * dst is assumed to be initialized. Its contents is freed before copying
2037 : data from src (if size / type are different) or overwritten (if
2038 : possible given destination size).
2039 : ************************************************************************/
2040 0 : void ae_x_set_vector(x_vector *dst, ae_vector *src, ae_state *state)
2041 : {
2042 0 : if( src->ptr.p_ptr == dst->x_ptr.p_ptr )
2043 : {
2044 : /* src->ptr points to the beginning of dst, attached matrices, no need to copy */
2045 0 : return;
2046 : }
2047 0 : if( dst->cnt!=src->cnt || dst->datatype!=src->datatype )
2048 : {
2049 0 : if( dst->owner==OWN_AE )
2050 0 : ae_free(dst->x_ptr.p_ptr);
2051 0 : dst->x_ptr.p_ptr = ae_malloc((size_t)(src->cnt*ae_sizeof(src->datatype)), state);
2052 0 : if( src->cnt!=0 && dst->x_ptr.p_ptr==NULL )
2053 0 : ae_break(state, ERR_OUT_OF_MEMORY, "ae_malloc(): out of memory");
2054 0 : dst->last_action = ACT_NEW_LOCATION;
2055 0 : dst->cnt = src->cnt;
2056 0 : dst->datatype = src->datatype;
2057 0 : dst->owner = OWN_AE;
2058 : }
2059 : else
2060 : {
2061 0 : if( dst->last_action==ACT_UNCHANGED )
2062 0 : dst->last_action = ACT_SAME_LOCATION;
2063 0 : else if( dst->last_action==ACT_SAME_LOCATION )
2064 0 : dst->last_action = ACT_SAME_LOCATION;
2065 0 : else if( dst->last_action==ACT_NEW_LOCATION )
2066 0 : dst->last_action = ACT_NEW_LOCATION;
2067 : else
2068 0 : ae_assert(ae_false, "ALGLIB: internal error in ae_x_set_vector()", state);
2069 : }
2070 0 : if( src->cnt )
2071 0 : memmove(dst->x_ptr.p_ptr, src->ptr.p_ptr, (size_t)(src->cnt*ae_sizeof(src->datatype)));
2072 : }
2073 :
2074 : /************************************************************************
2075 : This function copies contents of ae_matrix to x_matrix.
2076 :
2077 : This function should not be called for DST which is attached to SRC
2078 : (opposite situation, when SRC is attached to DST, is possible).
2079 :
2080 : Depending on situation, following actions are performed
2081 : * for SRC attached to DST, this function performs no actions (no need to
2082 : do anything)
2083 : * for independent matrices of different sizes it allocates storage in DST
2084 : and copy contents of SRC to DST. DST->last_action field is set to
2085 : ACT_NEW_LOCATION, and DST->owner is set to OWN_AE.
2086 : * for independent matrices of same sizes it does not perform memory
2087 : (re)allocation. It just copies SRC to already existing place.
2088 : DST->last_action is set to ACT_SAME_LOCATION (unless it was
2089 : ACT_NEW_LOCATION), DST->owner is unmodified.
2090 :
2091 : dst destination vector
2092 : src source, matrix in x-format
2093 : state ALGLIB environment state
2094 :
2095 : NOTES:
2096 : * dst is assumed to be initialized. Its contents is freed before copying
2097 : data from src (if size / type are different) or overwritten (if
2098 : possible given destination size).
2099 : ************************************************************************/
2100 0 : void ae_x_set_matrix(x_matrix *dst, ae_matrix *src, ae_state *state)
2101 : {
2102 : char *p_src_row;
2103 : char *p_dst_row;
2104 : ae_int_t i;
2105 : ae_int_t row_size;
2106 0 : if( src->ptr.pp_void!=NULL && src->ptr.pp_void[0] == dst->x_ptr.p_ptr )
2107 : {
2108 : /* src->ptr points to the beginning of dst, attached matrices, no need to copy */
2109 0 : return;
2110 : }
2111 0 : if( dst->rows!=src->rows || dst->cols!=src->cols || dst->datatype!=src->datatype )
2112 : {
2113 0 : if( dst->owner==OWN_AE )
2114 0 : ae_free(dst->x_ptr.p_ptr);
2115 0 : dst->rows = src->rows;
2116 0 : dst->cols = src->cols;
2117 0 : dst->stride = src->cols;
2118 0 : dst->datatype = src->datatype;
2119 0 : dst->x_ptr.p_ptr = ae_malloc((size_t)(dst->rows*((ae_int_t)dst->stride)*ae_sizeof(src->datatype)), state);
2120 0 : if( dst->rows!=0 && dst->stride!=0 && dst->x_ptr.p_ptr==NULL )
2121 0 : ae_break(state, ERR_OUT_OF_MEMORY, "ae_malloc(): out of memory");
2122 0 : dst->last_action = ACT_NEW_LOCATION;
2123 0 : dst->owner = OWN_AE;
2124 : }
2125 : else
2126 : {
2127 0 : if( dst->last_action==ACT_UNCHANGED )
2128 0 : dst->last_action = ACT_SAME_LOCATION;
2129 0 : else if( dst->last_action==ACT_SAME_LOCATION )
2130 0 : dst->last_action = ACT_SAME_LOCATION;
2131 0 : else if( dst->last_action==ACT_NEW_LOCATION )
2132 0 : dst->last_action = ACT_NEW_LOCATION;
2133 : else
2134 0 : ae_assert(ae_false, "ALGLIB: internal error in ae_x_set_vector()", state);
2135 : }
2136 0 : if( src->rows!=0 && src->cols!=0 )
2137 : {
2138 0 : p_src_row = (char*)(src->ptr.pp_void[0]);
2139 0 : p_dst_row = (char*)dst->x_ptr.p_ptr;
2140 0 : row_size = ae_sizeof(src->datatype)*src->cols;
2141 0 : for(i=0; i<src->rows; i++, p_src_row+=src->stride*ae_sizeof(src->datatype), p_dst_row+=dst->stride*ae_sizeof(src->datatype))
2142 0 : memmove(p_dst_row, p_src_row, (size_t)(row_size));
2143 : }
2144 : }
2145 :
2146 : /************************************************************************
2147 : This function attaches x_vector to ae_vector's contents.
2148 : Ownership of memory allocated is not changed (it is still managed by
2149 : ae_matrix).
2150 :
2151 : dst destination vector
2152 : src source, vector in x-format
2153 : state ALGLIB environment state
2154 :
2155 : NOTES:
2156 : * dst is assumed to be initialized. Its contents is freed before
2157 : attaching to src.
2158 : * this function doesn't need ae_state parameter because it can't fail
2159 : (assuming correctly initialized src)
2160 : ************************************************************************/
2161 0 : void ae_x_attach_to_vector(x_vector *dst, ae_vector *src)
2162 : {
2163 0 : if( dst->owner==OWN_AE )
2164 0 : ae_free(dst->x_ptr.p_ptr);
2165 0 : dst->x_ptr.p_ptr = src->ptr.p_ptr;
2166 0 : dst->last_action = ACT_NEW_LOCATION;
2167 0 : dst->cnt = src->cnt;
2168 0 : dst->datatype = src->datatype;
2169 0 : dst->owner = OWN_CALLER;
2170 0 : }
2171 :
2172 : /************************************************************************
2173 : This function attaches x_matrix to ae_matrix's contents.
2174 : Ownership of memory allocated is not changed (it is still managed by
2175 : ae_matrix).
2176 :
2177 : dst destination vector
2178 : src source, matrix in x-format
2179 : state ALGLIB environment state
2180 :
2181 : NOTES:
2182 : * dst is assumed to be initialized. Its contents is freed before
2183 : attaching to src.
2184 : * this function doesn't need ae_state parameter because it can't fail
2185 : (assuming correctly initialized src)
2186 : ************************************************************************/
2187 0 : void ae_x_attach_to_matrix(x_matrix *dst, ae_matrix *src)
2188 : {
2189 0 : if( dst->owner==OWN_AE )
2190 0 : ae_free(dst->x_ptr.p_ptr);
2191 0 : dst->rows = src->rows;
2192 0 : dst->cols = src->cols;
2193 0 : dst->stride = src->stride;
2194 0 : dst->datatype = src->datatype;
2195 0 : dst->x_ptr.p_ptr = &(src->ptr.pp_double[0][0]);
2196 0 : dst->last_action = ACT_NEW_LOCATION;
2197 0 : dst->owner = OWN_CALLER;
2198 0 : }
2199 :
2200 : /************************************************************************
2201 : This function clears x_vector. It does nothing if vector is not owned by
2202 : ALGLIB environment.
2203 :
2204 : dst vector
2205 : ************************************************************************/
2206 0 : void x_vector_clear(x_vector *dst)
2207 : {
2208 0 : if( dst->owner==OWN_AE )
2209 0 : aligned_free(dst->x_ptr.p_ptr);
2210 0 : dst->x_ptr.p_ptr = NULL;
2211 0 : dst->cnt = 0;
2212 0 : }
2213 :
2214 : /************************************************************************
2215 : Assertion
2216 :
2217 : For non-NULL state it allows to gracefully leave ALGLIB session,
2218 : removing all frames and deallocating registered dynamic data structure.
2219 :
2220 : For NULL state it just abort()'s program.
2221 :
2222 : IMPORTANT: this function ALWAYS evaluates its argument. It can not be
2223 : replaced by macro which does nothing. So, you may place actual
2224 : function calls at cond, and these will always be performed.
2225 : ************************************************************************/
2226 0 : void ae_assert(ae_bool cond, const char *msg, ae_state *state)
2227 : {
2228 0 : if( !cond )
2229 0 : ae_break(state, ERR_ASSERTION_FAILED, msg);
2230 0 : }
2231 :
2232 : /************************************************************************
2233 : CPUID
2234 :
2235 : Returns information about features CPU and compiler support.
2236 :
2237 : You must tell ALGLIB what CPU family is used by defining AE_CPU symbol
2238 : (without this hint zero will be returned).
2239 :
2240 : Note: results of this function depend on both CPU and compiler;
2241 : if compiler doesn't support SSE intrinsics, function won't set
2242 : corresponding flag.
2243 : ************************************************************************/
2244 : static volatile ae_bool _ae_cpuid_initialized = ae_false;
2245 : static volatile ae_bool _ae_cpuid_has_sse2 = ae_false;
2246 0 : ae_int_t ae_cpuid()
2247 : {
2248 : /*
2249 : * to speed up CPU detection we cache results from previous attempts
2250 : * there is no synchronization, but it is still thread safe.
2251 : *
2252 : * thread safety is guaranteed on all modern architectures which
2253 : * have following property: simultaneous writes by different cores
2254 : * to the same location will be executed in serial manner.
2255 : *
2256 : */
2257 : ae_int_t result;
2258 :
2259 : /*
2260 : * if not initialized, determine system properties
2261 : */
2262 0 : if( !_ae_cpuid_initialized )
2263 : {
2264 : /*
2265 : * SSE2
2266 : */
2267 : #if defined(AE_CPU)
2268 : #if (AE_CPU==AE_INTEL) && defined(AE_HAS_SSE2_INTRINSICS)
2269 : #if AE_COMPILER==AE_MSVC
2270 : {
2271 : int CPUInfo[4];
2272 : __cpuid(CPUInfo, 1);
2273 : if( (CPUInfo[3]&0x04000000)!=0 )
2274 : _ae_cpuid_has_sse2 = ae_true;
2275 : }
2276 : #elif AE_COMPILER==AE_GNUC
2277 : {
2278 : ae_int_t a,b,c,d;
2279 : __asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1));
2280 : if( (d&0x04000000)!=0 )
2281 : _ae_cpuid_has_sse2 = ae_true;
2282 : }
2283 : #elif AE_COMPILER==AE_SUNC
2284 : {
2285 : ae_int_t a,b,c,d;
2286 : __asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1));
2287 : if( (d&0x04000000)!=0 )
2288 : _ae_cpuid_has_sse2 = ae_true;
2289 : }
2290 : #else
2291 : #endif
2292 : #endif
2293 : #endif
2294 : /*
2295 : * Perform one more CPUID call to generate memory fence
2296 : */
2297 : #if AE_CPU==AE_INTEL
2298 : #if AE_COMPILER==AE_MSVC
2299 : { int CPUInfo[4]; __cpuid(CPUInfo, 1); }
2300 : #elif AE_COMPILER==AE_GNUC
2301 : { ae_int_t a,b,c,d; __asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1)); }
2302 : #elif AE_COMPILER==AE_SUNC
2303 : { ae_int_t a,b,c,d; __asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1)); }
2304 : #else
2305 : #endif
2306 : #endif
2307 :
2308 : /*
2309 : * set initialization flag
2310 : */
2311 0 : _ae_cpuid_initialized = ae_true;
2312 : }
2313 :
2314 : /*
2315 : * return
2316 : */
2317 0 : result = 0;
2318 0 : if( _ae_cpuid_has_sse2 )
2319 0 : result = result|CPU_SSE2;
2320 0 : return result;
2321 : }
2322 :
2323 : /************************************************************************
2324 : Activates tracing to file
2325 :
2326 : IMPORTANT: this function is NOT thread-safe! Calling it from multiple
2327 : threads will result in undefined behavior. Calling it when
2328 : some thread calls ALGLIB functions may result in undefined
2329 : behavior.
2330 : ************************************************************************/
2331 0 : void ae_trace_file(const char *tags, const char *filename)
2332 : {
2333 : /*
2334 : * clean up previous call
2335 : */
2336 0 : if( alglib_fclose_trace )
2337 : {
2338 0 : if( alglib_trace_file!=NULL )
2339 0 : fclose(alglib_trace_file);
2340 0 : alglib_trace_file = NULL;
2341 0 : alglib_fclose_trace = ae_false;
2342 : }
2343 :
2344 : /*
2345 : * store ",tags," to buffer. Leading and trailing commas allow us
2346 : * to perform checks for various tags by simply calling strstr().
2347 : */
2348 0 : memset(alglib_trace_tags, 0, ALGLIB_TRACE_BUFFER_LEN);
2349 0 : strcat(alglib_trace_tags, ",");
2350 0 : strncat(alglib_trace_tags, tags, ALGLIB_TRACE_TAGS_LEN);
2351 0 : strcat(alglib_trace_tags, ",");
2352 0 : for(int i=0; alglib_trace_tags[i]!=0; i++)
2353 0 : alglib_trace_tags[i] = tolower(alglib_trace_tags[i]);
2354 :
2355 : /*
2356 : * set up trace
2357 : */
2358 0 : alglib_trace_type = ALGLIB_TRACE_FILE;
2359 0 : alglib_trace_file = fopen(filename, "ab");
2360 0 : alglib_fclose_trace = ae_true;
2361 0 : }
2362 :
2363 : /************************************************************************
2364 : Disables tracing
2365 : ************************************************************************/
2366 0 : void ae_trace_disable()
2367 : {
2368 0 : alglib_trace_type = ALGLIB_TRACE_NONE;
2369 0 : if( alglib_fclose_trace )
2370 0 : fclose(alglib_trace_file);
2371 0 : alglib_trace_file = NULL;
2372 0 : alglib_fclose_trace = ae_false;
2373 0 : }
2374 :
2375 : /************************************************************************
2376 : Checks whether specific kind of tracing is enabled
2377 : ************************************************************************/
2378 0 : ae_bool ae_is_trace_enabled(const char *tag)
2379 : {
2380 : char buf[ALGLIB_TRACE_BUFFER_LEN];
2381 :
2382 : /* check global trace status */
2383 0 : if( alglib_trace_type==ALGLIB_TRACE_NONE || alglib_trace_file==NULL )
2384 0 : return ae_false;
2385 :
2386 : /* copy tag to buffer, lowercase it */
2387 0 : memset(buf, 0, ALGLIB_TRACE_BUFFER_LEN);
2388 0 : strcat(buf, ",");
2389 0 : strncat(buf, tag, ALGLIB_TRACE_TAGS_LEN);
2390 0 : strcat(buf, "?");
2391 0 : for(int i=0; buf[i]!=0; i++)
2392 0 : buf[i] = tolower(buf[i]);
2393 :
2394 : /* contains tag (followed by comma, which means exact match) */
2395 0 : buf[strlen(buf)-1] = ',';
2396 0 : if( strstr(alglib_trace_tags,buf)!=NULL )
2397 0 : return ae_true;
2398 :
2399 : /* contains tag (followed by dot, which means match with child) */
2400 0 : buf[strlen(buf)-1] = '.';
2401 0 : if( strstr(alglib_trace_tags,buf)!=NULL )
2402 0 : return ae_true;
2403 :
2404 : /* nothing */
2405 0 : return ae_false;
2406 : }
2407 :
2408 0 : void ae_trace(const char * printf_fmt, ...)
2409 : {
2410 : /* check global trace status */
2411 0 : if( alglib_trace_type==ALGLIB_TRACE_FILE && alglib_trace_file!=NULL )
2412 : {
2413 : va_list args;
2414 :
2415 : /* fprintf() */
2416 0 : va_start(args, printf_fmt);
2417 0 : vfprintf(alglib_trace_file, printf_fmt, args);
2418 0 : va_end(args);
2419 :
2420 : /* flush output */
2421 0 : fflush(alglib_trace_file);
2422 : }
2423 0 : }
2424 :
2425 0 : int ae_tickcount()
2426 : {
2427 : #if AE_OS==AE_WINDOWS || defined(AE_DEBUG4WINDOWS)
2428 : return (int)GetTickCount();
2429 : #elif AE_OS==AE_POSIX || defined(AE_DEBUG4POSIX)
2430 : struct timeval now;
2431 : ae_int64_t r, v;
2432 : gettimeofday(&now, NULL);
2433 : v = now.tv_sec;
2434 : r = v*1000;
2435 : v = now.tv_usec/1000;
2436 : r = r+v;
2437 : return r;
2438 : /*struct timespec now;
2439 : if (clock_gettime(CLOCK_MONOTONIC, &now) )
2440 : return 0;
2441 : return now.tv_sec * 1000.0 + now.tv_nsec / 1000000.0;*/
2442 : #else
2443 0 : return 0;
2444 : #endif
2445 : }
2446 :
2447 :
2448 : /************************************************************************
2449 : Real math functions
2450 : ************************************************************************/
2451 0 : ae_bool ae_fp_eq(double v1, double v2)
2452 : {
2453 : /* IEEE-strict floating point comparison */
2454 0 : volatile double x = v1;
2455 0 : volatile double y = v2;
2456 0 : return x==y;
2457 : }
2458 :
2459 0 : ae_bool ae_fp_neq(double v1, double v2)
2460 : {
2461 : /* IEEE-strict floating point comparison */
2462 0 : return !ae_fp_eq(v1,v2);
2463 : }
2464 :
2465 0 : ae_bool ae_fp_less(double v1, double v2)
2466 : {
2467 : /* IEEE-strict floating point comparison */
2468 0 : volatile double x = v1;
2469 0 : volatile double y = v2;
2470 0 : return x<y;
2471 : }
2472 :
2473 0 : ae_bool ae_fp_less_eq(double v1, double v2)
2474 : {
2475 : /* IEEE-strict floating point comparison */
2476 0 : volatile double x = v1;
2477 0 : volatile double y = v2;
2478 0 : return x<=y;
2479 : }
2480 :
2481 0 : ae_bool ae_fp_greater(double v1, double v2)
2482 : {
2483 : /* IEEE-strict floating point comparison */
2484 0 : volatile double x = v1;
2485 0 : volatile double y = v2;
2486 0 : return x>y;
2487 : }
2488 :
2489 0 : ae_bool ae_fp_greater_eq(double v1, double v2)
2490 : {
2491 : /* IEEE-strict floating point comparison */
2492 0 : volatile double x = v1;
2493 0 : volatile double y = v2;
2494 0 : return x>=y;
2495 : }
2496 :
2497 0 : ae_bool ae_isfinite_stateless(double x, ae_int_t endianness)
2498 : {
2499 : union _u
2500 : {
2501 : double a;
2502 : ae_int32_t p[2];
2503 : } u;
2504 : ae_int32_t high;
2505 0 : u.a = x;
2506 0 : if( endianness==AE_LITTLE_ENDIAN )
2507 0 : high = u.p[1];
2508 : else
2509 0 : high = u.p[0];
2510 0 : return (high & (ae_int32_t)0x7FF00000)!=(ae_int32_t)0x7FF00000;
2511 : }
2512 :
2513 0 : ae_bool ae_isnan_stateless(double x, ae_int_t endianness)
2514 : {
2515 : union _u
2516 : {
2517 : double a;
2518 : ae_int32_t p[2];
2519 : } u;
2520 : ae_int32_t high, low;
2521 0 : u.a = x;
2522 0 : if( endianness==AE_LITTLE_ENDIAN )
2523 : {
2524 0 : high = u.p[1];
2525 0 : low = u.p[0];
2526 : }
2527 : else
2528 : {
2529 0 : high = u.p[0];
2530 0 : low = u.p[1];
2531 : }
2532 0 : return ((high &0x7FF00000)==0x7FF00000) && (((high &0x000FFFFF)!=0) || (low!=0));
2533 : }
2534 :
2535 0 : ae_bool ae_isinf_stateless(double x, ae_int_t endianness)
2536 : {
2537 : union _u
2538 : {
2539 : double a;
2540 : ae_int32_t p[2];
2541 : } u;
2542 : ae_int32_t high, low;
2543 0 : u.a = x;
2544 0 : if( endianness==AE_LITTLE_ENDIAN )
2545 : {
2546 0 : high = u.p[1];
2547 0 : low = u.p[0];
2548 : }
2549 : else
2550 : {
2551 0 : high = u.p[0];
2552 0 : low = u.p[1];
2553 : }
2554 :
2555 : /* 31 least significant bits of high are compared */
2556 0 : return ((high&0x7FFFFFFF)==0x7FF00000) && (low==0);
2557 : }
2558 :
2559 0 : ae_bool ae_isposinf_stateless(double x, ae_int_t endianness)
2560 : {
2561 : union _u
2562 : {
2563 : double a;
2564 : ae_int32_t p[2];
2565 : } u;
2566 : ae_int32_t high, low;
2567 0 : u.a = x;
2568 0 : if( endianness==AE_LITTLE_ENDIAN )
2569 : {
2570 0 : high = u.p[1];
2571 0 : low = u.p[0];
2572 : }
2573 : else
2574 : {
2575 0 : high = u.p[0];
2576 0 : low = u.p[1];
2577 : }
2578 :
2579 : /* all 32 bits of high are compared */
2580 0 : return (high==(ae_int32_t)0x7FF00000) && (low==0);
2581 : }
2582 :
2583 0 : ae_bool ae_isneginf_stateless(double x, ae_int_t endianness)
2584 : {
2585 : union _u
2586 : {
2587 : double a;
2588 : ae_int32_t p[2];
2589 : } u;
2590 : ae_int32_t high, low;
2591 0 : u.a = x;
2592 0 : if( endianness==AE_LITTLE_ENDIAN )
2593 : {
2594 0 : high = u.p[1];
2595 0 : low = u.p[0];
2596 : }
2597 : else
2598 : {
2599 0 : high = u.p[0];
2600 0 : low = u.p[1];
2601 : }
2602 :
2603 : /* this code is a bit tricky to avoid comparison of high with 0xFFF00000, which may be unsafe with some buggy compilers */
2604 0 : return ((high&0x7FFFFFFF)==0x7FF00000) && (high!=(ae_int32_t)0x7FF00000) && (low==0);
2605 : }
2606 :
2607 32 : ae_int_t ae_get_endianness()
2608 : {
2609 : union
2610 : {
2611 : double a;
2612 : ae_int32_t p[2];
2613 : } u;
2614 :
2615 : /*
2616 : * determine endianness
2617 : * two types are supported: big-endian and little-endian.
2618 : * mixed-endian hardware is NOT supported.
2619 : *
2620 : * 1983 is used as magic number because its non-periodic double
2621 : * representation allow us to easily distinguish between upper
2622 : * and lower halfs and to detect mixed endian hardware.
2623 : *
2624 : */
2625 32 : u.a = 1.0/1983.0;
2626 32 : if( u.p[1]==(ae_int32_t)0x3f408642 )
2627 32 : return AE_LITTLE_ENDIAN;
2628 0 : if( u.p[0]==(ae_int32_t)0x3f408642 )
2629 0 : return AE_BIG_ENDIAN;
2630 0 : return AE_MIXED_ENDIAN;
2631 : }
2632 :
2633 0 : ae_bool ae_isfinite(double x,ae_state *state)
2634 : {
2635 0 : return ae_isfinite_stateless(x, state->endianness);
2636 : }
2637 :
2638 0 : ae_bool ae_isnan(double x, ae_state *state)
2639 : {
2640 0 : return ae_isnan_stateless(x, state->endianness);
2641 : }
2642 :
2643 0 : ae_bool ae_isinf(double x, ae_state *state)
2644 : {
2645 0 : return ae_isinf_stateless(x, state->endianness);
2646 : }
2647 :
2648 0 : ae_bool ae_isposinf(double x,ae_state *state)
2649 : {
2650 0 : return ae_isposinf_stateless(x, state->endianness);
2651 : }
2652 :
2653 0 : ae_bool ae_isneginf(double x,ae_state *state)
2654 : {
2655 0 : return ae_isneginf_stateless(x, state->endianness);
2656 : }
2657 :
2658 0 : double ae_fabs(double x, ae_state *state)
2659 : {
2660 0 : return fabs(x);
2661 : }
2662 :
2663 0 : ae_int_t ae_iabs(ae_int_t x, ae_state *state)
2664 : {
2665 0 : return x>=0 ? x : -x;
2666 : }
2667 :
2668 0 : double ae_sqr(double x, ae_state *state)
2669 : {
2670 0 : return x*x;
2671 : }
2672 :
2673 0 : double ae_sqrt(double x, ae_state *state)
2674 : {
2675 0 : return sqrt(x);
2676 : }
2677 :
2678 0 : ae_int_t ae_sign(double x, ae_state *state)
2679 : {
2680 0 : if( x>0 ) return 1;
2681 0 : if( x<0 ) return -1;
2682 0 : return 0;
2683 : }
2684 :
2685 0 : ae_int_t ae_round(double x, ae_state *state)
2686 : {
2687 0 : return (ae_int_t)(ae_ifloor(x+0.5,state));
2688 : }
2689 :
2690 0 : ae_int_t ae_trunc(double x, ae_state *state)
2691 : {
2692 0 : return (ae_int_t)(x>0 ? ae_ifloor(x,state) : ae_iceil(x,state));
2693 : }
2694 :
2695 0 : ae_int_t ae_ifloor(double x, ae_state *state)
2696 : {
2697 0 : return (ae_int_t)(floor(x));
2698 : }
2699 :
2700 0 : ae_int_t ae_iceil(double x, ae_state *state)
2701 : {
2702 0 : return (ae_int_t)(ceil(x));
2703 : }
2704 :
2705 0 : ae_int_t ae_maxint(ae_int_t m1, ae_int_t m2, ae_state *state)
2706 : {
2707 0 : return m1>m2 ? m1 : m2;
2708 : }
2709 :
2710 0 : ae_int_t ae_minint(ae_int_t m1, ae_int_t m2, ae_state *state)
2711 : {
2712 0 : return m1>m2 ? m2 : m1;
2713 : }
2714 :
2715 0 : double ae_maxreal(double m1, double m2, ae_state *state)
2716 : {
2717 0 : return m1>m2 ? m1 : m2;
2718 : }
2719 :
2720 0 : double ae_minreal(double m1, double m2, ae_state *state)
2721 : {
2722 0 : return m1>m2 ? m2 : m1;
2723 : }
2724 :
2725 0 : double ae_randomreal(ae_state *state)
2726 : {
2727 0 : int i1 = rand();
2728 0 : int i2 = rand();
2729 0 : double mx = (double)(RAND_MAX)+1.0;
2730 0 : volatile double tmp0 = i2/mx;
2731 0 : volatile double tmp1 = i1+tmp0;
2732 0 : return tmp1/mx;
2733 : }
2734 :
2735 0 : ae_int_t ae_randominteger(ae_int_t maxv, ae_state *state)
2736 : {
2737 0 : return rand()%maxv;
2738 : }
2739 :
2740 0 : double ae_sin(double x, ae_state *state)
2741 : {
2742 0 : return sin(x);
2743 : }
2744 :
2745 0 : double ae_cos(double x, ae_state *state)
2746 : {
2747 0 : return cos(x);
2748 : }
2749 :
2750 0 : double ae_tan(double x, ae_state *state)
2751 : {
2752 0 : return tan(x);
2753 : }
2754 :
2755 0 : double ae_sinh(double x, ae_state *state)
2756 : {
2757 0 : return sinh(x);
2758 : }
2759 :
2760 0 : double ae_cosh(double x, ae_state *state)
2761 : {
2762 0 : return cosh(x);
2763 : }
2764 0 : double ae_tanh(double x, ae_state *state)
2765 : {
2766 0 : return tanh(x);
2767 : }
2768 :
2769 0 : double ae_asin(double x, ae_state *state)
2770 : {
2771 0 : return asin(x);
2772 : }
2773 :
2774 0 : double ae_acos(double x, ae_state *state)
2775 : {
2776 0 : return acos(x);
2777 : }
2778 :
2779 0 : double ae_atan(double x, ae_state *state)
2780 : {
2781 0 : return atan(x);
2782 : }
2783 :
2784 0 : double ae_atan2(double y, double x, ae_state *state)
2785 : {
2786 0 : return atan2(y,x);
2787 : }
2788 :
2789 0 : double ae_log(double x, ae_state *state)
2790 : {
2791 0 : return log(x);
2792 : }
2793 :
2794 0 : double ae_pow(double x, double y, ae_state *state)
2795 : {
2796 0 : return pow(x,y);
2797 : }
2798 :
2799 0 : double ae_exp(double x, ae_state *state)
2800 : {
2801 0 : return exp(x);
2802 : }
2803 :
2804 : /************************************************************************
2805 : Symmetric/Hermitian properties: check and force
2806 : ************************************************************************/
2807 0 : static void x_split_length(ae_int_t n, ae_int_t nb, ae_int_t* n1, ae_int_t* n2)
2808 : {
2809 : ae_int_t r;
2810 0 : if( n<=nb )
2811 : {
2812 0 : *n1 = n;
2813 0 : *n2 = 0;
2814 : }
2815 : else
2816 : {
2817 0 : if( n%nb!=0 )
2818 : {
2819 0 : *n2 = n%nb;
2820 0 : *n1 = n-(*n2);
2821 : }
2822 : else
2823 : {
2824 0 : *n2 = n/2;
2825 0 : *n1 = n-(*n2);
2826 0 : if( *n1%nb==0 )
2827 : {
2828 0 : return;
2829 : }
2830 0 : r = nb-*n1%nb;
2831 0 : *n1 = *n1+r;
2832 0 : *n2 = *n2-r;
2833 : }
2834 : }
2835 : }
2836 0 : static double x_safepythag2(double x, double y)
2837 : {
2838 : double w;
2839 : double xabs;
2840 : double yabs;
2841 : double z;
2842 0 : xabs = fabs(x);
2843 0 : yabs = fabs(y);
2844 0 : w = xabs>yabs ? xabs : yabs;
2845 0 : z = xabs<yabs ? xabs : yabs;
2846 0 : if( z==0 )
2847 0 : return w;
2848 : else
2849 : {
2850 : double t;
2851 0 : t = z/w;
2852 0 : return w*sqrt(1+t*t);
2853 : }
2854 : }
2855 : /*
2856 : * this function checks difference between offdiagonal blocks BL and BU
2857 : * (see below). Block BL is specified by offsets (offset0,offset1) and
2858 : * sizes (len0,len1).
2859 : *
2860 : * [ . ]
2861 : * [ A0 BU ]
2862 : * A = [ BL A1 ]
2863 : * [ . ]
2864 : *
2865 : * this subroutine updates current values of:
2866 : * a) mx maximum value of A[i,j] found so far
2867 : * b) err componentwise difference between elements of BL and BU^T
2868 : *
2869 : */
2870 0 : static void is_symmetric_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1, ae_bool *nonfinite, double *mx, double *err, ae_state *_state)
2871 : {
2872 : /* try to split problem into two smaller ones */
2873 0 : if( len0>x_nb || len1>x_nb )
2874 : {
2875 : ae_int_t n1, n2;
2876 0 : if( len0>len1 )
2877 : {
2878 0 : x_split_length(len0, x_nb, &n1, &n2);
2879 0 : is_symmetric_rec_off_stat(a, offset0, offset1, n1, len1, nonfinite, mx, err, _state);
2880 0 : is_symmetric_rec_off_stat(a, offset0+n1, offset1, n2, len1, nonfinite, mx, err, _state);
2881 : }
2882 : else
2883 : {
2884 0 : x_split_length(len1, x_nb, &n1, &n2);
2885 0 : is_symmetric_rec_off_stat(a, offset0, offset1, len0, n1, nonfinite, mx, err, _state);
2886 0 : is_symmetric_rec_off_stat(a, offset0, offset1+n1, len0, n2, nonfinite, mx, err, _state);
2887 : }
2888 0 : return;
2889 : }
2890 : else
2891 : {
2892 : /* base case */
2893 : double *p1, *p2, *prow, *pcol;
2894 : double v;
2895 : ae_int_t i, j;
2896 :
2897 0 : p1 = (double*)(a->x_ptr.p_ptr)+offset0*a->stride+offset1;
2898 0 : p2 = (double*)(a->x_ptr.p_ptr)+offset1*a->stride+offset0;
2899 0 : for(i=0; i<len0; i++)
2900 : {
2901 0 : pcol = p2+i;
2902 0 : prow = p1+i*a->stride;
2903 0 : for(j=0; j<len1; j++)
2904 : {
2905 0 : if( !ae_isfinite(*pcol,_state) || !ae_isfinite(*prow,_state) )
2906 : {
2907 0 : *nonfinite = ae_true;
2908 : }
2909 : else
2910 : {
2911 0 : v = fabs(*pcol);
2912 0 : *mx = *mx>v ? *mx : v;
2913 0 : v = fabs(*prow);
2914 0 : *mx = *mx>v ? *mx : v;
2915 0 : v = fabs(*pcol-*prow);
2916 0 : *err = *err>v ? *err : v;
2917 : }
2918 0 : pcol += a->stride;
2919 0 : prow++;
2920 : }
2921 : }
2922 : }
2923 : }
2924 : /*
2925 : * this function checks that diagonal block A0 is symmetric.
2926 : * Block A0 is specified by its offset and size.
2927 : *
2928 : * [ . ]
2929 : * [ A0 ]
2930 : * A = [ . ]
2931 : * [ . ]
2932 : *
2933 : * this subroutine updates current values of:
2934 : * a) mx maximum value of A[i,j] found so far
2935 : * b) err componentwise difference between A0 and A0^T
2936 : *
2937 : */
2938 0 : static void is_symmetric_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len, ae_bool *nonfinite, double *mx, double *err, ae_state *_state)
2939 : {
2940 : double *p, *prow, *pcol;
2941 : double v;
2942 : ae_int_t i, j;
2943 :
2944 : /* try to split problem into two smaller ones */
2945 0 : if( len>x_nb )
2946 : {
2947 : ae_int_t n1, n2;
2948 0 : x_split_length(len, x_nb, &n1, &n2);
2949 0 : is_symmetric_rec_diag_stat(a, offset, n1, nonfinite, mx, err, _state);
2950 0 : is_symmetric_rec_diag_stat(a, offset+n1, n2, nonfinite, mx, err, _state);
2951 0 : is_symmetric_rec_off_stat(a, offset+n1, offset, n2, n1, nonfinite, mx, err, _state);
2952 0 : return;
2953 : }
2954 :
2955 : /* base case */
2956 0 : p = (double*)(a->x_ptr.p_ptr)+offset*a->stride+offset;
2957 0 : for(i=0; i<len; i++)
2958 : {
2959 0 : pcol = p+i;
2960 0 : prow = p+i*a->stride;
2961 0 : for(j=0; j<i; j++,pcol+=a->stride,prow++)
2962 : {
2963 0 : if( !ae_isfinite(*pcol,_state) || !ae_isfinite(*prow,_state) )
2964 : {
2965 0 : *nonfinite = ae_true;
2966 : }
2967 : else
2968 : {
2969 0 : v = fabs(*pcol);
2970 0 : *mx = *mx>v ? *mx : v;
2971 0 : v = fabs(*prow);
2972 0 : *mx = *mx>v ? *mx : v;
2973 0 : v = fabs(*pcol-*prow);
2974 0 : *err = *err>v ? *err : v;
2975 : }
2976 : }
2977 0 : v = fabs(p[i+i*a->stride]);
2978 0 : *mx = *mx>v ? *mx : v;
2979 : }
2980 : }
2981 : /*
2982 : * this function checks difference between offdiagonal blocks BL and BU
2983 : * (see below). Block BL is specified by offsets (offset0,offset1) and
2984 : * sizes (len0,len1).
2985 : *
2986 : * [ . ]
2987 : * [ A0 BU ]
2988 : * A = [ BL A1 ]
2989 : * [ . ]
2990 : *
2991 : * this subroutine updates current values of:
2992 : * a) mx maximum value of A[i,j] found so far
2993 : * b) err componentwise difference between elements of BL and BU^H
2994 : *
2995 : */
2996 0 : static void is_hermitian_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1, ae_bool *nonfinite, double *mx, double *err, ae_state *_state)
2997 : {
2998 : /* try to split problem into two smaller ones */
2999 0 : if( len0>x_nb || len1>x_nb )
3000 : {
3001 : ae_int_t n1, n2;
3002 0 : if( len0>len1 )
3003 : {
3004 0 : x_split_length(len0, x_nb, &n1, &n2);
3005 0 : is_hermitian_rec_off_stat(a, offset0, offset1, n1, len1, nonfinite, mx, err, _state);
3006 0 : is_hermitian_rec_off_stat(a, offset0+n1, offset1, n2, len1, nonfinite, mx, err, _state);
3007 : }
3008 : else
3009 : {
3010 0 : x_split_length(len1, x_nb, &n1, &n2);
3011 0 : is_hermitian_rec_off_stat(a, offset0, offset1, len0, n1, nonfinite, mx, err, _state);
3012 0 : is_hermitian_rec_off_stat(a, offset0, offset1+n1, len0, n2, nonfinite, mx, err, _state);
3013 : }
3014 0 : return;
3015 : }
3016 : else
3017 : {
3018 : /* base case */
3019 : ae_complex *p1, *p2, *prow, *pcol;
3020 : double v;
3021 : ae_int_t i, j;
3022 :
3023 0 : p1 = (ae_complex*)(a->x_ptr.p_ptr)+offset0*a->stride+offset1;
3024 0 : p2 = (ae_complex*)(a->x_ptr.p_ptr)+offset1*a->stride+offset0;
3025 0 : for(i=0; i<len0; i++)
3026 : {
3027 0 : pcol = p2+i;
3028 0 : prow = p1+i*a->stride;
3029 0 : for(j=0; j<len1; j++)
3030 : {
3031 0 : if( !ae_isfinite(pcol->x, _state) || !ae_isfinite(pcol->y, _state) || !ae_isfinite(prow->x, _state) || !ae_isfinite(prow->y, _state) )
3032 : {
3033 0 : *nonfinite = ae_true;
3034 : }
3035 : else
3036 : {
3037 0 : v = x_safepythag2(pcol->x, pcol->y);
3038 0 : *mx = *mx>v ? *mx : v;
3039 0 : v = x_safepythag2(prow->x, prow->y);
3040 0 : *mx = *mx>v ? *mx : v;
3041 0 : v = x_safepythag2(pcol->x-prow->x, pcol->y+prow->y);
3042 0 : *err = *err>v ? *err : v;
3043 : }
3044 0 : pcol += a->stride;
3045 0 : prow++;
3046 : }
3047 : }
3048 : }
3049 : }
3050 : /*
3051 : * this function checks that diagonal block A0 is Hermitian.
3052 : * Block A0 is specified by its offset and size.
3053 : *
3054 : * [ . ]
3055 : * [ A0 ]
3056 : * A = [ . ]
3057 : * [ . ]
3058 : *
3059 : * this subroutine updates current values of:
3060 : * a) mx maximum value of A[i,j] found so far
3061 : * b) err componentwise difference between A0 and A0^H
3062 : *
3063 : */
3064 0 : static void is_hermitian_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len, ae_bool *nonfinite, double *mx, double *err, ae_state *_state)
3065 : {
3066 : ae_complex *p, *prow, *pcol;
3067 : double v;
3068 : ae_int_t i, j;
3069 :
3070 : /* try to split problem into two smaller ones */
3071 0 : if( len>x_nb )
3072 : {
3073 : ae_int_t n1, n2;
3074 0 : x_split_length(len, x_nb, &n1, &n2);
3075 0 : is_hermitian_rec_diag_stat(a, offset, n1, nonfinite, mx, err, _state);
3076 0 : is_hermitian_rec_diag_stat(a, offset+n1, n2, nonfinite, mx, err, _state);
3077 0 : is_hermitian_rec_off_stat(a, offset+n1, offset, n2, n1, nonfinite, mx, err, _state);
3078 0 : return;
3079 : }
3080 :
3081 : /* base case */
3082 0 : p = (ae_complex*)(a->x_ptr.p_ptr)+offset*a->stride+offset;
3083 0 : for(i=0; i<len; i++)
3084 : {
3085 0 : pcol = p+i;
3086 0 : prow = p+i*a->stride;
3087 0 : for(j=0; j<i; j++,pcol+=a->stride,prow++)
3088 : {
3089 0 : if( !ae_isfinite(pcol->x, _state) || !ae_isfinite(pcol->y, _state) || !ae_isfinite(prow->x, _state) || !ae_isfinite(prow->y, _state) )
3090 : {
3091 0 : *nonfinite = ae_true;
3092 : }
3093 : else
3094 : {
3095 0 : v = x_safepythag2(pcol->x, pcol->y);
3096 0 : *mx = *mx>v ? *mx : v;
3097 0 : v = x_safepythag2(prow->x, prow->y);
3098 0 : *mx = *mx>v ? *mx : v;
3099 0 : v = x_safepythag2(pcol->x-prow->x, pcol->y+prow->y);
3100 0 : *err = *err>v ? *err : v;
3101 : }
3102 : }
3103 0 : if( !ae_isfinite(p[i+i*a->stride].x, _state) || !ae_isfinite(p[i+i*a->stride].y, _state) )
3104 : {
3105 0 : *nonfinite = ae_true;
3106 : }
3107 : else
3108 : {
3109 0 : v = fabs(p[i+i*a->stride].x);
3110 0 : *mx = *mx>v ? *mx : v;
3111 0 : v = fabs(p[i+i*a->stride].y);
3112 0 : *err = *err>v ? *err : v;
3113 : }
3114 : }
3115 : }
3116 : /*
3117 : * this function copies offdiagonal block BL to its symmetric counterpart
3118 : * BU (see below). Block BL is specified by offsets (offset0,offset1)
3119 : * and sizes (len0,len1).
3120 : *
3121 : * [ . ]
3122 : * [ A0 BU ]
3123 : * A = [ BL A1 ]
3124 : * [ . ]
3125 : *
3126 : */
3127 0 : static void force_symmetric_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1)
3128 : {
3129 : /* try to split problem into two smaller ones */
3130 0 : if( len0>x_nb || len1>x_nb )
3131 : {
3132 : ae_int_t n1, n2;
3133 0 : if( len0>len1 )
3134 : {
3135 0 : x_split_length(len0, x_nb, &n1, &n2);
3136 0 : force_symmetric_rec_off_stat(a, offset0, offset1, n1, len1);
3137 0 : force_symmetric_rec_off_stat(a, offset0+n1, offset1, n2, len1);
3138 : }
3139 : else
3140 : {
3141 0 : x_split_length(len1, x_nb, &n1, &n2);
3142 0 : force_symmetric_rec_off_stat(a, offset0, offset1, len0, n1);
3143 0 : force_symmetric_rec_off_stat(a, offset0, offset1+n1, len0, n2);
3144 : }
3145 0 : return;
3146 : }
3147 : else
3148 : {
3149 : /* base case */
3150 : double *p1, *p2, *prow, *pcol;
3151 : ae_int_t i, j;
3152 :
3153 0 : p1 = (double*)(a->x_ptr.p_ptr)+offset0*a->stride+offset1;
3154 0 : p2 = (double*)(a->x_ptr.p_ptr)+offset1*a->stride+offset0;
3155 0 : for(i=0; i<len0; i++)
3156 : {
3157 0 : pcol = p2+i;
3158 0 : prow = p1+i*a->stride;
3159 0 : for(j=0; j<len1; j++)
3160 : {
3161 0 : *pcol = *prow;
3162 0 : pcol += a->stride;
3163 0 : prow++;
3164 : }
3165 : }
3166 : }
3167 : }
3168 : /*
3169 : * this function copies lower part of diagonal block A0 to its upper part
3170 : * Block is specified by offset and size.
3171 : *
3172 : * [ . ]
3173 : * [ A0 ]
3174 : * A = [ . ]
3175 : * [ . ]
3176 : *
3177 : */
3178 0 : static void force_symmetric_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len)
3179 : {
3180 : double *p, *prow, *pcol;
3181 : ae_int_t i, j;
3182 :
3183 : /* try to split problem into two smaller ones */
3184 0 : if( len>x_nb )
3185 : {
3186 : ae_int_t n1, n2;
3187 0 : x_split_length(len, x_nb, &n1, &n2);
3188 0 : force_symmetric_rec_diag_stat(a, offset, n1);
3189 0 : force_symmetric_rec_diag_stat(a, offset+n1, n2);
3190 0 : force_symmetric_rec_off_stat(a, offset+n1, offset, n2, n1);
3191 0 : return;
3192 : }
3193 :
3194 : /* base case */
3195 0 : p = (double*)(a->x_ptr.p_ptr)+offset*a->stride+offset;
3196 0 : for(i=0; i<len; i++)
3197 : {
3198 0 : pcol = p+i;
3199 0 : prow = p+i*a->stride;
3200 0 : for(j=0; j<i; j++,pcol+=a->stride,prow++)
3201 0 : *pcol = *prow;
3202 : }
3203 : }
3204 : /*
3205 : * this function copies Hermitian transpose of offdiagonal block BL to
3206 : * its symmetric counterpart BU (see below). Block BL is specified by
3207 : * offsets (offset0,offset1) and sizes (len0,len1).
3208 : *
3209 : * [ . ]
3210 : * [ A0 BU ]
3211 : * A = [ BL A1 ]
3212 : * [ . ]
3213 : */
3214 0 : static void force_hermitian_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1)
3215 : {
3216 : /* try to split problem into two smaller ones */
3217 0 : if( len0>x_nb || len1>x_nb )
3218 : {
3219 : ae_int_t n1, n2;
3220 0 : if( len0>len1 )
3221 : {
3222 0 : x_split_length(len0, x_nb, &n1, &n2);
3223 0 : force_hermitian_rec_off_stat(a, offset0, offset1, n1, len1);
3224 0 : force_hermitian_rec_off_stat(a, offset0+n1, offset1, n2, len1);
3225 : }
3226 : else
3227 : {
3228 0 : x_split_length(len1, x_nb, &n1, &n2);
3229 0 : force_hermitian_rec_off_stat(a, offset0, offset1, len0, n1);
3230 0 : force_hermitian_rec_off_stat(a, offset0, offset1+n1, len0, n2);
3231 : }
3232 0 : return;
3233 : }
3234 : else
3235 : {
3236 : /* base case */
3237 : ae_complex *p1, *p2, *prow, *pcol;
3238 : ae_int_t i, j;
3239 :
3240 0 : p1 = (ae_complex*)(a->x_ptr.p_ptr)+offset0*a->stride+offset1;
3241 0 : p2 = (ae_complex*)(a->x_ptr.p_ptr)+offset1*a->stride+offset0;
3242 0 : for(i=0; i<len0; i++)
3243 : {
3244 0 : pcol = p2+i;
3245 0 : prow = p1+i*a->stride;
3246 0 : for(j=0; j<len1; j++)
3247 : {
3248 0 : *pcol = *prow;
3249 0 : pcol += a->stride;
3250 0 : prow++;
3251 : }
3252 : }
3253 : }
3254 : }
3255 : /*
3256 : * this function copies Hermitian transpose of lower part of
3257 : * diagonal block A0 to its upper part Block is specified by offset and size.
3258 : *
3259 : * [ . ]
3260 : * [ A0 ]
3261 : * A = [ . ]
3262 : * [ . ]
3263 : *
3264 : */
3265 0 : static void force_hermitian_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len)
3266 : {
3267 : ae_complex *p, *prow, *pcol;
3268 : ae_int_t i, j;
3269 :
3270 : /* try to split problem into two smaller ones */
3271 0 : if( len>x_nb )
3272 : {
3273 : ae_int_t n1, n2;
3274 0 : x_split_length(len, x_nb, &n1, &n2);
3275 0 : force_hermitian_rec_diag_stat(a, offset, n1);
3276 0 : force_hermitian_rec_diag_stat(a, offset+n1, n2);
3277 0 : force_hermitian_rec_off_stat(a, offset+n1, offset, n2, n1);
3278 0 : return;
3279 : }
3280 :
3281 : /* base case */
3282 0 : p = (ae_complex*)(a->x_ptr.p_ptr)+offset*a->stride+offset;
3283 0 : for(i=0; i<len; i++)
3284 : {
3285 0 : pcol = p+i;
3286 0 : prow = p+i*a->stride;
3287 0 : for(j=0; j<i; j++,pcol+=a->stride,prow++)
3288 0 : *pcol = *prow;
3289 : }
3290 : }
3291 0 : ae_bool x_is_symmetric(x_matrix *a)
3292 : {
3293 : double mx, err;
3294 : ae_bool nonfinite;
3295 : ae_state _alglib_env_state;
3296 0 : if( a->datatype!=DT_REAL )
3297 0 : return ae_false;
3298 0 : if( a->cols!=a->rows )
3299 0 : return ae_false;
3300 0 : if( a->cols==0 || a->rows==0 )
3301 0 : return ae_true;
3302 0 : ae_state_init(&_alglib_env_state);
3303 0 : mx = 0;
3304 0 : err = 0;
3305 0 : nonfinite = ae_false;
3306 0 : is_symmetric_rec_diag_stat(a, 0, (ae_int_t)a->rows, &nonfinite, &mx, &err, &_alglib_env_state);
3307 0 : if( nonfinite )
3308 0 : return ae_false;
3309 0 : if( mx==0 )
3310 0 : return ae_true;
3311 0 : return err/mx<=1.0E-14;
3312 : }
3313 0 : ae_bool x_is_hermitian(x_matrix *a)
3314 : {
3315 : double mx, err;
3316 : ae_bool nonfinite;
3317 : ae_state _alglib_env_state;
3318 0 : if( a->datatype!=DT_COMPLEX )
3319 0 : return ae_false;
3320 0 : if( a->cols!=a->rows )
3321 0 : return ae_false;
3322 0 : if( a->cols==0 || a->rows==0 )
3323 0 : return ae_true;
3324 0 : ae_state_init(&_alglib_env_state);
3325 0 : mx = 0;
3326 0 : err = 0;
3327 0 : nonfinite = ae_false;
3328 0 : is_hermitian_rec_diag_stat(a, 0, (ae_int_t)a->rows, &nonfinite, &mx, &err, &_alglib_env_state);
3329 0 : if( nonfinite )
3330 0 : return ae_false;
3331 0 : if( mx==0 )
3332 0 : return ae_true;
3333 0 : return err/mx<=1.0E-14;
3334 : }
3335 0 : ae_bool x_force_symmetric(x_matrix *a)
3336 : {
3337 0 : if( a->datatype!=DT_REAL )
3338 0 : return ae_false;
3339 0 : if( a->cols!=a->rows )
3340 0 : return ae_false;
3341 0 : if( a->cols==0 || a->rows==0 )
3342 0 : return ae_true;
3343 0 : force_symmetric_rec_diag_stat(a, 0, (ae_int_t)a->rows);
3344 0 : return ae_true;
3345 : }
3346 0 : ae_bool x_force_hermitian(x_matrix *a)
3347 : {
3348 0 : if( a->datatype!=DT_COMPLEX )
3349 0 : return ae_false;
3350 0 : if( a->cols!=a->rows )
3351 0 : return ae_false;
3352 0 : if( a->cols==0 || a->rows==0 )
3353 0 : return ae_true;
3354 0 : force_hermitian_rec_diag_stat(a, 0, (ae_int_t)a->rows);
3355 0 : return ae_true;
3356 : }
3357 :
3358 0 : ae_bool ae_is_symmetric(ae_matrix *a)
3359 : {
3360 : x_matrix x;
3361 0 : x.owner = OWN_CALLER;
3362 0 : ae_x_attach_to_matrix(&x, a);
3363 0 : return x_is_symmetric(&x);
3364 : }
3365 :
3366 0 : ae_bool ae_is_hermitian(ae_matrix *a)
3367 : {
3368 : x_matrix x;
3369 0 : x.owner = OWN_CALLER;
3370 0 : ae_x_attach_to_matrix(&x, a);
3371 0 : return x_is_hermitian(&x);
3372 : }
3373 :
3374 0 : ae_bool ae_force_symmetric(ae_matrix *a)
3375 : {
3376 : x_matrix x;
3377 0 : x.owner = OWN_CALLER;
3378 0 : ae_x_attach_to_matrix(&x, a);
3379 0 : return x_force_symmetric(&x);
3380 : }
3381 :
3382 0 : ae_bool ae_force_hermitian(ae_matrix *a)
3383 : {
3384 : x_matrix x;
3385 0 : x.owner = OWN_CALLER;
3386 0 : ae_x_attach_to_matrix(&x, a);
3387 0 : return x_force_hermitian(&x);
3388 : }
3389 :
3390 : /************************************************************************
3391 : This function converts six-bit value (from 0 to 63) to character (only
3392 : digits, lowercase and uppercase letters, minus and underscore are used).
3393 :
3394 : If v is negative or greater than 63, this function returns '?'.
3395 : ************************************************************************/
3396 : static char _sixbits2char_tbl[64] = {
3397 : '0', '1', '2', '3', '4', '5', '6', '7',
3398 : '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
3399 : 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
3400 : 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
3401 : 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd',
3402 : 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
3403 : 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
3404 : 'u', 'v', 'w', 'x', 'y', 'z', '-', '_' };
3405 :
3406 0 : char ae_sixbits2char(ae_int_t v)
3407 : {
3408 :
3409 0 : if( v<0 || v>63 )
3410 0 : return '?';
3411 0 : return _sixbits2char_tbl[v];
3412 :
3413 : /* v is correct, process it */
3414 : /*if( v<10 )
3415 : return '0'+v;
3416 : v -= 10;
3417 : if( v<26 )
3418 : return 'A'+v;
3419 : v -= 26;
3420 : if( v<26 )
3421 : return 'a'+v;
3422 : v -= 26;
3423 : return v==0 ? '-' : '_';*/
3424 : }
3425 :
3426 : /************************************************************************
3427 : This function converts character to six-bit value (from 0 to 63).
3428 :
3429 : This function is inverse of ae_sixbits2char()
3430 : If c is not correct character, this function returns -1.
3431 : ************************************************************************/
3432 : static ae_int_t _ae_char2sixbits_tbl[] = {
3433 : -1, -1, -1, -1, -1, -1, -1, -1,
3434 : -1, -1, -1, -1, -1, -1, -1, -1,
3435 : -1, -1, -1, -1, -1, -1, -1, -1,
3436 : -1, -1, -1, -1, -1, -1, -1, -1,
3437 : -1, -1, -1, -1, -1, -1, -1, -1,
3438 : -1, -1, -1, -1, -1, 62, -1, -1,
3439 : 0, 1, 2, 3, 4, 5, 6, 7,
3440 : 8, 9, -1, -1, -1, -1, -1, -1,
3441 : -1, 10, 11, 12, 13, 14, 15, 16,
3442 : 17, 18, 19, 20, 21, 22, 23, 24,
3443 : 25, 26, 27, 28, 29, 30, 31, 32,
3444 : 33, 34, 35, -1, -1, -1, -1, 63,
3445 : -1, 36, 37, 38, 39, 40, 41, 42,
3446 : 43, 44, 45, 46, 47, 48, 49, 50,
3447 : 51, 52, 53, 54, 55, 56, 57, 58,
3448 : 59, 60, 61, -1, -1, -1, -1, -1 };
3449 0 : ae_int_t ae_char2sixbits(char c)
3450 : {
3451 0 : return (c>=0 && c<127) ? _ae_char2sixbits_tbl[(int)c] : -1;
3452 : }
3453 :
3454 : /************************************************************************
3455 : This function converts three bytes (24 bits) to four six-bit values
3456 : (24 bits again).
3457 :
3458 : src pointer to three bytes
3459 : dst pointer to four ints
3460 : ************************************************************************/
3461 0 : void ae_threebytes2foursixbits(const unsigned char *src, ae_int_t *dst)
3462 : {
3463 0 : dst[0] = src[0] & 0x3F;
3464 0 : dst[1] = (src[0]>>6) | ((src[1]&0x0F)<<2);
3465 0 : dst[2] = (src[1]>>4) | ((src[2]&0x03)<<4);
3466 0 : dst[3] = src[2]>>2;
3467 0 : }
3468 :
3469 : /************************************************************************
3470 : This function converts four six-bit values (24 bits) to three bytes
3471 : (24 bits again).
3472 :
3473 : src pointer to four ints
3474 : dst pointer to three bytes
3475 : ************************************************************************/
3476 0 : void ae_foursixbits2threebytes(const ae_int_t *src, unsigned char *dst)
3477 : {
3478 0 : dst[0] = (unsigned char)( src[0] | ((src[1]&0x03)<<6));
3479 0 : dst[1] = (unsigned char)((src[1]>>2) | ((src[2]&0x0F)<<4));
3480 0 : dst[2] = (unsigned char)((src[2]>>4) | (src[3]<<2));
3481 0 : }
3482 :
3483 : /************************************************************************
3484 : This function serializes boolean value into buffer
3485 :
3486 : v boolean value to be serialized
3487 : buf buffer, at least 12 characters wide
3488 : (11 chars for value, one for trailing zero)
3489 : state ALGLIB environment state
3490 : ************************************************************************/
3491 0 : void ae_bool2str(ae_bool v, char *buf, ae_state *state)
3492 : {
3493 0 : char c = v ? '1' : '0';
3494 : ae_int_t i;
3495 0 : for(i=0; i<AE_SER_ENTRY_LENGTH; i++)
3496 0 : buf[i] = c;
3497 0 : buf[AE_SER_ENTRY_LENGTH] = 0;
3498 0 : }
3499 :
3500 : /************************************************************************
3501 : This function unserializes boolean value from buffer
3502 :
3503 : buf buffer which contains value; leading spaces/tabs/newlines are
3504 : ignored, traling spaces/tabs/newlines are treated as end of
3505 : the boolean value.
3506 : state ALGLIB environment state
3507 :
3508 : This function raises an error in case unexpected symbol is found
3509 : ************************************************************************/
3510 0 : ae_bool ae_str2bool(const char *buf, ae_state *state, const char **pasttheend)
3511 : {
3512 : ae_bool was0, was1;
3513 0 : const char *emsg = "ALGLIB: unable to read boolean value from stream";
3514 :
3515 0 : was0 = ae_false;
3516 0 : was1 = ae_false;
3517 0 : while( *buf==' ' || *buf=='\t' || *buf=='\n' || *buf=='\r' )
3518 0 : buf++;
3519 0 : while( *buf!=' ' && *buf!='\t' && *buf!='\n' && *buf!='\r' && *buf!=0 )
3520 : {
3521 0 : if( *buf=='0' )
3522 : {
3523 0 : was0 = ae_true;
3524 0 : buf++;
3525 0 : continue;
3526 : }
3527 0 : if( *buf=='1' )
3528 : {
3529 0 : was1 = ae_true;
3530 0 : buf++;
3531 0 : continue;
3532 : }
3533 0 : ae_break(state, ERR_ASSERTION_FAILED, emsg);
3534 : }
3535 0 : *pasttheend = buf;
3536 0 : if( (!was0) && (!was1) )
3537 0 : ae_break(state, ERR_ASSERTION_FAILED, emsg);
3538 0 : if( was0 && was1 )
3539 0 : ae_break(state, ERR_ASSERTION_FAILED, emsg);
3540 0 : return was1 ? ae_true : ae_false;
3541 : }
3542 :
3543 : /************************************************************************
3544 : This function serializes integer value into buffer
3545 :
3546 : v integer value to be serialized
3547 : buf buffer, at least 12 characters wide
3548 : (11 chars for value, one for trailing zero)
3549 : state ALGLIB environment state
3550 : ************************************************************************/
3551 0 : void ae_int2str(ae_int_t v, char *buf, ae_state *state)
3552 : {
3553 : union _u
3554 : {
3555 : ae_int_t ival;
3556 : unsigned char bytes[9];
3557 : } u;
3558 : ae_int_t i;
3559 : ae_int_t sixbits[12];
3560 : unsigned char c;
3561 :
3562 : /*
3563 : * copy v to array of chars, sign extending it and
3564 : * converting to little endian order
3565 : *
3566 : * because we don't want to mention size of ae_int_t explicitly,
3567 : * we do it as follows:
3568 : * 1. we fill u.bytes by zeros or ones (depending on sign of v)
3569 : * 2. we copy v to u.ival
3570 : * 3. if we run on big endian architecture, we reorder u.bytes
3571 : * 4. now we have signed 64-bit representation of v stored in u.bytes
3572 : * 5. additionally, we set 9th byte of u.bytes to zero in order to
3573 : * simplify conversion to six-bit representation
3574 : */
3575 0 : c = v<0 ? (unsigned char)0xFF : (unsigned char)0x00;
3576 0 : u.ival = v;
3577 0 : for(i=sizeof(ae_int_t); i<=8; i++) /* <=8 is preferred because it avoids unnecessary compiler warnings*/
3578 0 : u.bytes[i] = c;
3579 0 : u.bytes[8] = 0;
3580 0 : if( state->endianness==AE_BIG_ENDIAN )
3581 : {
3582 0 : for(i=0; i<(ae_int_t)(sizeof(ae_int_t)/2); i++)
3583 : {
3584 : unsigned char tc;
3585 0 : tc = u.bytes[i];
3586 0 : u.bytes[i] = u.bytes[sizeof(ae_int_t)-1-i];
3587 0 : u.bytes[sizeof(ae_int_t)-1-i] = tc;
3588 : }
3589 : }
3590 :
3591 : /*
3592 : * convert to six-bit representation, output
3593 : *
3594 : * NOTE: last 12th element of sixbits is always zero, we do not output it
3595 : */
3596 0 : ae_threebytes2foursixbits(u.bytes+0, sixbits+0);
3597 0 : ae_threebytes2foursixbits(u.bytes+3, sixbits+4);
3598 0 : ae_threebytes2foursixbits(u.bytes+6, sixbits+8);
3599 0 : for(i=0; i<AE_SER_ENTRY_LENGTH; i++)
3600 0 : buf[i] = ae_sixbits2char(sixbits[i]);
3601 0 : buf[AE_SER_ENTRY_LENGTH] = 0x00;
3602 0 : }
3603 :
3604 : /************************************************************************
3605 : This function serializes 64-bit integer value into buffer
3606 :
3607 : v integer value to be serialized
3608 : buf buffer, at least 12 characters wide
3609 : (11 chars for value, one for trailing zero)
3610 : state ALGLIB environment state
3611 : ************************************************************************/
3612 0 : void ae_int642str(ae_int64_t v, char *buf, ae_state *state)
3613 : {
3614 : unsigned char bytes[9];
3615 : ae_int_t i;
3616 : ae_int_t sixbits[12];
3617 :
3618 : /*
3619 : * copy v to array of chars, sign extending it and
3620 : * converting to little endian order
3621 : *
3622 : * because we don't want to mention size of ae_int_t explicitly,
3623 : * we do it as follows:
3624 : * 1. we fill bytes by zeros or ones (depending on sign of v)
3625 : * 2. we memmove v to bytes
3626 : * 3. if we run on big endian architecture, we reorder bytes
3627 : * 4. now we have signed 64-bit representation of v stored in bytes
3628 : * 5. additionally, we set 9th byte of bytes to zero in order to
3629 : * simplify conversion to six-bit representation
3630 : */
3631 0 : memset(bytes, v<0 ? 0xFF : 0x00, 8);
3632 0 : memmove(bytes, &v, 8);
3633 0 : bytes[8] = 0;
3634 0 : if( state->endianness==AE_BIG_ENDIAN )
3635 : {
3636 0 : for(i=0; i<(ae_int_t)(sizeof(ae_int_t)/2); i++)
3637 : {
3638 : unsigned char tc;
3639 0 : tc = bytes[i];
3640 0 : bytes[i] = bytes[sizeof(ae_int_t)-1-i];
3641 0 : bytes[sizeof(ae_int_t)-1-i] = tc;
3642 : }
3643 : }
3644 :
3645 : /*
3646 : * convert to six-bit representation, output
3647 : *
3648 : * NOTE: last 12th element of sixbits is always zero, we do not output it
3649 : */
3650 0 : ae_threebytes2foursixbits(bytes+0, sixbits+0);
3651 0 : ae_threebytes2foursixbits(bytes+3, sixbits+4);
3652 0 : ae_threebytes2foursixbits(bytes+6, sixbits+8);
3653 0 : for(i=0; i<AE_SER_ENTRY_LENGTH; i++)
3654 0 : buf[i] = ae_sixbits2char(sixbits[i]);
3655 0 : buf[AE_SER_ENTRY_LENGTH] = 0x00;
3656 0 : }
3657 :
3658 : /************************************************************************
3659 : This function unserializes integer value from string
3660 :
3661 : buf buffer which contains value; leading spaces/tabs/newlines are
3662 : ignored, traling spaces/tabs/newlines are treated as end of
3663 : the boolean value.
3664 : state ALGLIB environment state
3665 :
3666 : This function raises an error in case unexpected symbol is found
3667 : ************************************************************************/
3668 0 : ae_int_t ae_str2int(const char *buf, ae_state *state, const char **pasttheend)
3669 : {
3670 0 : const char *emsg = "ALGLIB: unable to read integer value from stream";
3671 : ae_int_t sixbits[12];
3672 : ae_int_t sixbitsread, i;
3673 : union _u
3674 : {
3675 : ae_int_t ival;
3676 : unsigned char bytes[9];
3677 : } u;
3678 : /*
3679 : * 1. skip leading spaces
3680 : * 2. read and decode six-bit digits
3681 : * 3. set trailing digits to zeros
3682 : * 4. convert to little endian 64-bit integer representation
3683 : * 5. convert to big endian representation, if needed
3684 : */
3685 0 : while( *buf==' ' || *buf=='\t' || *buf=='\n' || *buf=='\r' )
3686 0 : buf++;
3687 0 : sixbitsread = 0;
3688 0 : while( *buf!=' ' && *buf!='\t' && *buf!='\n' && *buf!='\r' && *buf!=0 )
3689 : {
3690 : ae_int_t d;
3691 0 : d = ae_char2sixbits(*buf);
3692 0 : if( d<0 || sixbitsread>=AE_SER_ENTRY_LENGTH )
3693 0 : ae_break(state, ERR_ASSERTION_FAILED, emsg);
3694 0 : sixbits[sixbitsread] = d;
3695 0 : sixbitsread++;
3696 0 : buf++;
3697 : }
3698 0 : *pasttheend = buf;
3699 0 : if( sixbitsread==0 )
3700 0 : ae_break(state, ERR_ASSERTION_FAILED, emsg);
3701 0 : for(i=sixbitsread; i<12; i++)
3702 0 : sixbits[i] = 0;
3703 0 : ae_foursixbits2threebytes(sixbits+0, u.bytes+0);
3704 0 : ae_foursixbits2threebytes(sixbits+4, u.bytes+3);
3705 0 : ae_foursixbits2threebytes(sixbits+8, u.bytes+6);
3706 0 : if( state->endianness==AE_BIG_ENDIAN )
3707 : {
3708 0 : for(i=0; i<(ae_int_t)(sizeof(ae_int_t)/2); i++)
3709 : {
3710 : unsigned char tc;
3711 0 : tc = u.bytes[i];
3712 0 : u.bytes[i] = u.bytes[sizeof(ae_int_t)-1-i];
3713 0 : u.bytes[sizeof(ae_int_t)-1-i] = tc;
3714 : }
3715 : }
3716 0 : return u.ival;
3717 : }
3718 :
3719 : /************************************************************************
3720 : This function unserializes 64-bit integer value from string
3721 :
3722 : buf buffer which contains value; leading spaces/tabs/newlines are
3723 : ignored, traling spaces/tabs/newlines are treated as end of
3724 : the boolean value.
3725 : state ALGLIB environment state
3726 :
3727 : This function raises an error in case unexpected symbol is found
3728 : ************************************************************************/
3729 0 : ae_int64_t ae_str2int64(const char *buf, ae_state *state, const char **pasttheend)
3730 : {
3731 0 : const char *emsg = "ALGLIB: unable to read integer value from stream";
3732 : ae_int_t sixbits[12];
3733 : ae_int_t sixbitsread, i;
3734 : unsigned char bytes[9];
3735 : ae_int64_t result;
3736 :
3737 : /*
3738 : * 1. skip leading spaces
3739 : * 2. read and decode six-bit digits
3740 : * 3. set trailing digits to zeros
3741 : * 4. convert to little endian 64-bit integer representation
3742 : * 5. convert to big endian representation, if needed
3743 : */
3744 0 : while( *buf==' ' || *buf=='\t' || *buf=='\n' || *buf=='\r' )
3745 0 : buf++;
3746 0 : sixbitsread = 0;
3747 0 : while( *buf!=' ' && *buf!='\t' && *buf!='\n' && *buf!='\r' && *buf!=0 )
3748 : {
3749 : ae_int_t d;
3750 0 : d = ae_char2sixbits(*buf);
3751 0 : if( d<0 || sixbitsread>=AE_SER_ENTRY_LENGTH )
3752 0 : ae_break(state, ERR_ASSERTION_FAILED, emsg);
3753 0 : sixbits[sixbitsread] = d;
3754 0 : sixbitsread++;
3755 0 : buf++;
3756 : }
3757 0 : *pasttheend = buf;
3758 0 : if( sixbitsread==0 )
3759 0 : ae_break(state, ERR_ASSERTION_FAILED, emsg);
3760 0 : for(i=sixbitsread; i<12; i++)
3761 0 : sixbits[i] = 0;
3762 0 : ae_foursixbits2threebytes(sixbits+0, bytes+0);
3763 0 : ae_foursixbits2threebytes(sixbits+4, bytes+3);
3764 0 : ae_foursixbits2threebytes(sixbits+8, bytes+6);
3765 0 : if( state->endianness==AE_BIG_ENDIAN )
3766 : {
3767 0 : for(i=0; i<(ae_int_t)(sizeof(ae_int_t)/2); i++)
3768 : {
3769 : unsigned char tc;
3770 0 : tc = bytes[i];
3771 0 : bytes[i] = bytes[sizeof(ae_int_t)-1-i];
3772 0 : bytes[sizeof(ae_int_t)-1-i] = tc;
3773 : }
3774 : }
3775 0 : memmove(&result, bytes, sizeof(result));
3776 0 : return result;
3777 : }
3778 :
3779 :
3780 : /************************************************************************
3781 : This function serializes double value into buffer
3782 :
3783 : v double value to be serialized
3784 : buf buffer, at least 12 characters wide
3785 : (11 chars for value, one for trailing zero)
3786 : state ALGLIB environment state
3787 : ************************************************************************/
3788 0 : void ae_double2str(double v, char *buf, ae_state *state)
3789 : {
3790 : union _u
3791 : {
3792 : double dval;
3793 : unsigned char bytes[9];
3794 : } u;
3795 : ae_int_t i;
3796 : ae_int_t sixbits[12];
3797 :
3798 : /*
3799 : * handle special quantities
3800 : */
3801 0 : if( ae_isnan(v, state) )
3802 : {
3803 0 : const char *s = ".nan_______";
3804 0 : memmove(buf, s, strlen(s)+1);
3805 0 : return;
3806 : }
3807 0 : if( ae_isposinf(v, state) )
3808 : {
3809 0 : const char *s = ".posinf____";
3810 0 : memmove(buf, s, strlen(s)+1);
3811 0 : return;
3812 : }
3813 0 : if( ae_isneginf(v, state) )
3814 : {
3815 0 : const char *s = ".neginf____";
3816 0 : memmove(buf, s, strlen(s)+1);
3817 0 : return;
3818 : }
3819 :
3820 : /*
3821 : * process general case:
3822 : * 1. copy v to array of chars
3823 : * 2. set 9th byte of u.bytes to zero in order to
3824 : * simplify conversion to six-bit representation
3825 : * 3. convert to little endian (if needed)
3826 : * 4. convert to six-bit representation
3827 : * (last 12th element of sixbits is always zero, we do not output it)
3828 : */
3829 0 : u.dval = v;
3830 0 : u.bytes[8] = 0;
3831 0 : if( state->endianness==AE_BIG_ENDIAN )
3832 : {
3833 0 : for(i=0; i<(ae_int_t)(sizeof(double)/2); i++)
3834 : {
3835 : unsigned char tc;
3836 0 : tc = u.bytes[i];
3837 0 : u.bytes[i] = u.bytes[sizeof(double)-1-i];
3838 0 : u.bytes[sizeof(double)-1-i] = tc;
3839 : }
3840 : }
3841 0 : ae_threebytes2foursixbits(u.bytes+0, sixbits+0);
3842 0 : ae_threebytes2foursixbits(u.bytes+3, sixbits+4);
3843 0 : ae_threebytes2foursixbits(u.bytes+6, sixbits+8);
3844 0 : for(i=0; i<AE_SER_ENTRY_LENGTH; i++)
3845 0 : buf[i] = ae_sixbits2char(sixbits[i]);
3846 0 : buf[AE_SER_ENTRY_LENGTH] = 0x00;
3847 : }
3848 :
3849 : /************************************************************************
3850 : This function unserializes double value from string
3851 :
3852 : buf buffer which contains value; leading spaces/tabs/newlines are
3853 : ignored, traling spaces/tabs/newlines are treated as end of
3854 : the boolean value.
3855 : state ALGLIB environment state
3856 :
3857 : This function raises an error in case unexpected symbol is found
3858 : ************************************************************************/
3859 0 : double ae_str2double(const char *buf, ae_state *state, const char **pasttheend)
3860 : {
3861 0 : const char *emsg = "ALGLIB: unable to read double value from stream";
3862 : ae_int_t sixbits[12];
3863 : ae_int_t sixbitsread, i;
3864 : union _u
3865 : {
3866 : double dval;
3867 : unsigned char bytes[9];
3868 : } u;
3869 :
3870 :
3871 : /*
3872 : * skip leading spaces
3873 : */
3874 0 : while( *buf==' ' || *buf=='\t' || *buf=='\n' || *buf=='\r' )
3875 0 : buf++;
3876 :
3877 : /*
3878 : * Handle special cases
3879 : */
3880 0 : if( *buf=='.' )
3881 : {
3882 0 : const char *s_nan = ".nan_______";
3883 0 : const char *s_posinf = ".posinf____";
3884 0 : const char *s_neginf = ".neginf____";
3885 0 : if( strncmp(buf, s_nan, strlen(s_nan))==0 )
3886 : {
3887 0 : *pasttheend = buf+strlen(s_nan);
3888 0 : return state->v_nan;
3889 : }
3890 0 : if( strncmp(buf, s_posinf, strlen(s_posinf))==0 )
3891 : {
3892 0 : *pasttheend = buf+strlen(s_posinf);
3893 0 : return state->v_posinf;
3894 : }
3895 0 : if( strncmp(buf, s_neginf, strlen(s_neginf))==0 )
3896 : {
3897 0 : *pasttheend = buf+strlen(s_neginf);
3898 0 : return state->v_neginf;
3899 : }
3900 0 : ae_break(state, ERR_ASSERTION_FAILED, emsg);
3901 : }
3902 :
3903 : /*
3904 : * General case:
3905 : * 1. read and decode six-bit digits
3906 : * 2. check that all 11 digits were read
3907 : * 3. set last 12th digit to zero (needed for simplicity of conversion)
3908 : * 4. convert to 8 bytes
3909 : * 5. convert to big endian representation, if needed
3910 : */
3911 0 : sixbitsread = 0;
3912 0 : while( *buf!=' ' && *buf!='\t' && *buf!='\n' && *buf!='\r' && *buf!=0 )
3913 : {
3914 : ae_int_t d;
3915 0 : d = ae_char2sixbits(*buf);
3916 0 : if( d<0 || sixbitsread>=AE_SER_ENTRY_LENGTH )
3917 0 : ae_break(state, ERR_ASSERTION_FAILED, emsg);
3918 0 : sixbits[sixbitsread] = d;
3919 0 : sixbitsread++;
3920 0 : buf++;
3921 : }
3922 0 : *pasttheend = buf;
3923 0 : if( sixbitsread!=AE_SER_ENTRY_LENGTH )
3924 0 : ae_break(state, ERR_ASSERTION_FAILED, emsg);
3925 0 : sixbits[AE_SER_ENTRY_LENGTH] = 0;
3926 0 : ae_foursixbits2threebytes(sixbits+0, u.bytes+0);
3927 0 : ae_foursixbits2threebytes(sixbits+4, u.bytes+3);
3928 0 : ae_foursixbits2threebytes(sixbits+8, u.bytes+6);
3929 0 : if( state->endianness==AE_BIG_ENDIAN )
3930 : {
3931 0 : for(i=0; i<(ae_int_t)(sizeof(double)/2); i++)
3932 : {
3933 : unsigned char tc;
3934 0 : tc = u.bytes[i];
3935 0 : u.bytes[i] = u.bytes[sizeof(double)-1-i];
3936 0 : u.bytes[sizeof(double)-1-i] = tc;
3937 : }
3938 : }
3939 0 : return u.dval;
3940 : }
3941 :
3942 :
3943 : /************************************************************************
3944 : This function performs given number of spin-wait iterations
3945 : ************************************************************************/
3946 0 : void ae_spin_wait(ae_int_t cnt)
3947 : {
3948 : /*
3949 : * these strange operations with ae_never_change_it are necessary to
3950 : * prevent compiler optimization of the loop.
3951 : */
3952 : volatile ae_int_t i;
3953 :
3954 : /* very unlikely because no one will wait for such amount of cycles */
3955 0 : if( cnt>0x12345678 )
3956 0 : ae_never_change_it = cnt%10;
3957 :
3958 : /* spin wait, test condition which will never be true */
3959 0 : for(i=0; i<cnt; i++)
3960 0 : if( ae_never_change_it>0 )
3961 0 : ae_never_change_it--;
3962 0 : }
3963 :
3964 :
3965 : /************************************************************************
3966 : This function causes the calling thread to relinquish the CPU. The thread
3967 : is moved to the end of the queue and some other thread gets to run.
3968 :
3969 : NOTE: this function should NOT be called when AE_OS is AE_UNKNOWN - the
3970 : whole program will be abnormally terminated.
3971 : ************************************************************************/
3972 0 : void ae_yield()
3973 : {
3974 : #if AE_OS==AE_WINDOWS
3975 : if( !SwitchToThread() )
3976 : Sleep(0);
3977 : #elif AE_OS==AE_POSIX
3978 : sched_yield();
3979 : #else
3980 0 : abort();
3981 : #endif
3982 : }
3983 :
3984 : /************************************************************************
3985 : This function initializes _lock structure which is internally used by
3986 : ae_lock high-level structure.
3987 :
3988 : _lock structure is statically allocated, no malloc() calls is performed
3989 : during its allocation. However, you have to call _ae_free_lock_raw() in
3990 : order to deallocate this lock properly.
3991 : ************************************************************************/
3992 0 : void _ae_init_lock_raw(_lock *p)
3993 : {
3994 : #if AE_OS==AE_WINDOWS
3995 : p->p_lock = (ae_int_t*)ae_align((void*)(&p->buf),AE_LOCK_ALIGNMENT);
3996 : p->p_lock[0] = 0;
3997 : #elif AE_OS==AE_POSIX
3998 : pthread_mutex_init(&p->mutex, NULL);
3999 : #else
4000 0 : p->is_locked = ae_false;
4001 : #endif
4002 0 : }
4003 :
4004 :
4005 : /************************************************************************
4006 : This function acquires _lock structure.
4007 :
4008 : It is low-level workhorse utilized by ae_acquire_lock().
4009 : ************************************************************************/
4010 0 : void _ae_acquire_lock_raw(_lock *p)
4011 : {
4012 : #if AE_OS==AE_WINDOWS
4013 : ae_int_t cnt = 0;
4014 : #ifdef AE_SMP_DEBUGCOUNTERS
4015 : InterlockedIncrement((LONG volatile *)&_ae_dbg_lock_acquisitions);
4016 : #endif
4017 : for(;;)
4018 : {
4019 : if( InterlockedCompareExchange((LONG volatile *)p->p_lock, 1, 0)==0 )
4020 : return;
4021 : ae_spin_wait(AE_LOCK_CYCLES);
4022 : #ifdef AE_SMP_DEBUGCOUNTERS
4023 : InterlockedIncrement((LONG volatile *)&_ae_dbg_lock_spinwaits);
4024 : #endif
4025 : cnt++;
4026 : if( cnt%AE_LOCK_TESTS_BEFORE_YIELD==0 )
4027 : {
4028 : #ifdef AE_SMP_DEBUGCOUNTERS
4029 : InterlockedIncrement((LONG volatile *)&_ae_dbg_lock_yields);
4030 : #endif
4031 : ae_yield();
4032 : }
4033 : }
4034 : #elif AE_OS==AE_POSIX
4035 : ae_int_t cnt = 0;
4036 : for(;;)
4037 : {
4038 : if( pthread_mutex_trylock(&p->mutex)==0 )
4039 : return;
4040 : ae_spin_wait(AE_LOCK_CYCLES);
4041 : cnt++;
4042 : if( cnt%AE_LOCK_TESTS_BEFORE_YIELD==0 )
4043 : ae_yield();
4044 : }
4045 : ;
4046 : #else
4047 0 : AE_CRITICAL_ASSERT(!p->is_locked);
4048 0 : p->is_locked = ae_true;
4049 : #endif
4050 0 : }
4051 :
4052 :
4053 : /************************************************************************
4054 : This function releases _lock structure.
4055 :
4056 : It is low-level lock function which is used by ae_release_lock.
4057 : ************************************************************************/
4058 0 : void _ae_release_lock_raw(_lock *p)
4059 : {
4060 : #if AE_OS==AE_WINDOWS
4061 : InterlockedExchange((LONG volatile *)p->p_lock, 0);
4062 : #elif AE_OS==AE_POSIX
4063 : pthread_mutex_unlock(&p->mutex);
4064 : #else
4065 0 : p->is_locked = ae_false;
4066 : #endif
4067 0 : }
4068 :
4069 :
4070 : /************************************************************************
4071 : This function frees _lock structure.
4072 : ************************************************************************/
4073 0 : void _ae_free_lock_raw(_lock *p)
4074 : {
4075 : #if AE_OS==AE_POSIX
4076 : pthread_mutex_destroy(&p->mutex);
4077 : #endif
4078 0 : }
4079 :
4080 :
4081 : /************************************************************************
4082 : This function initializes ae_lock structure.
4083 :
4084 : INPUT PARAMETERS:
4085 : lock - pointer to lock structure, must be zero-filled
4086 : state - pointer to state structure, used for exception
4087 : handling and management of automatic objects.
4088 : make_automatic - if true, lock object is added to automatic
4089 : memory management list.
4090 :
4091 : NOTE: as a special exception, this function allows you to specify NULL
4092 : state pointer. In this case all exception arising during construction
4093 : are handled as critical failures, with abort() being called.
4094 : make_automatic must be false on such calls.
4095 : ************************************************************************/
4096 0 : void ae_init_lock(ae_lock *lock, ae_state *state, ae_bool make_automatic)
4097 : {
4098 : _lock *p;
4099 0 : AE_CRITICAL_ASSERT(ae_check_zeros(lock,sizeof(*lock)));
4100 0 : if(state==NULL)
4101 : {
4102 : ae_state _tmp_state;
4103 0 : AE_CRITICAL_ASSERT(!make_automatic);
4104 0 : ae_state_init(&_tmp_state);
4105 0 : ae_init_lock(lock, &_tmp_state, ae_false);
4106 0 : ae_state_clear(&_tmp_state);
4107 0 : return;
4108 : }
4109 0 : lock->eternal = ae_false;
4110 0 : ae_db_init(&lock->db, sizeof(_lock), state, make_automatic);
4111 0 : lock->lock_ptr = lock->db.ptr;
4112 0 : p = (_lock*)lock->lock_ptr;
4113 0 : _ae_init_lock_raw(p);
4114 : }
4115 :
4116 : /************************************************************************
4117 : This function initializes "eternal" ae_lock structure which is expected
4118 : to persist until the end of the execution of the program. Eternal locks
4119 : can not be deallocated (cleared) and do not increase debug allocation
4120 : counters. Errors during allocation of eternal locks are considered
4121 : critical exceptions and handled by calling abort().
4122 :
4123 : INPUT PARAMETERS:
4124 : lock - pointer to lock structure, must be zero-filled
4125 : state - pointer to state structure, used for exception
4126 : handling and management of automatic objects;
4127 : non-NULL.
4128 : make_automatic - if true, lock object is added to automatic
4129 : memory management list.
4130 : ************************************************************************/
4131 0 : void ae_init_lock_eternal(ae_lock *lock)
4132 : {
4133 : _lock *p;
4134 0 : AE_CRITICAL_ASSERT(ae_check_zeros(lock,sizeof(*lock)));
4135 0 : lock->eternal = ae_true;
4136 0 : lock->lock_ptr = eternal_malloc(sizeof(_lock));
4137 0 : p = (_lock*)lock->lock_ptr;
4138 0 : _ae_init_lock_raw(p);
4139 0 : }
4140 :
4141 :
4142 : /************************************************************************
4143 : This function acquires lock. In case lock is busy, we perform several
4144 : iterations inside tight loop before trying again.
4145 : ************************************************************************/
4146 0 : void ae_acquire_lock(ae_lock *lock)
4147 : {
4148 : _lock *p;
4149 0 : p = (_lock*)lock->lock_ptr;
4150 0 : _ae_acquire_lock_raw(p);
4151 0 : }
4152 :
4153 :
4154 : /************************************************************************
4155 : This function releases lock.
4156 : ************************************************************************/
4157 0 : void ae_release_lock(ae_lock *lock)
4158 : {
4159 : _lock *p;
4160 0 : p = (_lock*)lock->lock_ptr;
4161 0 : _ae_release_lock_raw(p);
4162 0 : }
4163 :
4164 :
4165 : /************************************************************************
4166 : This function frees ae_lock structure.
4167 : ************************************************************************/
4168 0 : void ae_free_lock(ae_lock *lock)
4169 : {
4170 : _lock *p;
4171 0 : AE_CRITICAL_ASSERT(!lock->eternal);
4172 0 : p = (_lock*)lock->lock_ptr;
4173 0 : if( p!=NULL )
4174 0 : _ae_free_lock_raw(p);
4175 0 : ae_db_free(&lock->db);
4176 0 : }
4177 :
4178 :
4179 : /************************************************************************
4180 : This function creates ae_shared_pool structure.
4181 :
4182 : dst destination shared pool, must be zero-filled
4183 : already allocated, but not initialized.
4184 : state pointer to current state structure. Can not be NULL.
4185 : used for exception handling (say, allocation error results
4186 : in longjmp call).
4187 : make_automatic if true, vector will be registered in the current frame
4188 : of the state structure;
4189 :
4190 : Error handling:
4191 : * on failure calls ae_break() with NULL state pointer. Usually it results
4192 : in abort() call.
4193 :
4194 : dst is assumed to be uninitialized, its fields are ignored.
4195 : ************************************************************************/
4196 0 : void ae_shared_pool_init(void *_dst, ae_state *state, ae_bool make_automatic)
4197 : {
4198 : ae_shared_pool *dst;
4199 :
4200 0 : AE_CRITICAL_ASSERT(state!=NULL);
4201 0 : dst = (ae_shared_pool*)_dst;
4202 0 : AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
4203 :
4204 : /* init */
4205 0 : dst->seed_object = NULL;
4206 0 : dst->recycled_objects = NULL;
4207 0 : dst->recycled_entries = NULL;
4208 0 : dst->enumeration_counter = NULL;
4209 0 : dst->size_of_object = 0;
4210 0 : dst->init = NULL;
4211 0 : dst->init_copy = NULL;
4212 0 : dst->destroy = NULL;
4213 0 : dst->frame_entry.deallocator = ae_shared_pool_destroy;
4214 0 : dst->frame_entry.ptr = dst;
4215 0 : if( make_automatic )
4216 0 : ae_db_attach(&dst->frame_entry, state);
4217 0 : ae_init_lock(&dst->pool_lock, state, ae_false);
4218 0 : }
4219 :
4220 :
4221 : /************************************************************************
4222 : This function clears all dynamically allocated fields of the pool except
4223 : for the lock. It does NOT try to acquire pool_lock.
4224 :
4225 : NOTE: this function is NOT thread-safe, it is not protected by lock.
4226 : ************************************************************************/
4227 0 : static void ae_shared_pool_internalclear(ae_shared_pool *dst)
4228 : {
4229 : ae_shared_pool_entry *ptr, *tmp;
4230 :
4231 : /* destroy seed */
4232 0 : if( dst->seed_object!=NULL )
4233 : {
4234 0 : dst->destroy((void*)dst->seed_object);
4235 0 : ae_free((void*)dst->seed_object);
4236 0 : dst->seed_object = NULL;
4237 : }
4238 :
4239 : /* destroy recycled objects */
4240 0 : for(ptr=dst->recycled_objects; ptr!=NULL;)
4241 : {
4242 0 : tmp = (ae_shared_pool_entry*)ptr->next_entry;
4243 0 : dst->destroy(ptr->obj);
4244 0 : ae_free(ptr->obj);
4245 0 : ae_free(ptr);
4246 0 : ptr = tmp;
4247 : }
4248 0 : dst->recycled_objects = NULL;
4249 :
4250 : /* destroy recycled entries */
4251 0 : for(ptr=dst->recycled_entries; ptr!=NULL;)
4252 : {
4253 0 : tmp = (ae_shared_pool_entry*)ptr->next_entry;
4254 0 : ae_free(ptr);
4255 0 : ptr = tmp;
4256 : }
4257 0 : dst->recycled_entries = NULL;
4258 0 : }
4259 :
4260 :
4261 : /************************************************************************
4262 : This function creates copy of ae_shared_pool.
4263 :
4264 : dst destination pool, must be zero-filled
4265 : src source pool
4266 : state pointer to current state structure. Can not be NULL.
4267 : used for exception handling (say, allocation error results
4268 : in longjmp call).
4269 : make_automatic if true, vector will be registered in the current frame
4270 : of the state structure;
4271 :
4272 : dst is assumed to be uninitialized, its fields are ignored.
4273 :
4274 : NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
4275 : you should NOT call it when lock can be used by another thread.
4276 : ************************************************************************/
4277 0 : void ae_shared_pool_init_copy(void *_dst, void *_src, ae_state *state, ae_bool make_automatic)
4278 : {
4279 : ae_shared_pool *dst, *src;
4280 : ae_shared_pool_entry *ptr;
4281 :
4282 : /* state!=NULL, allocation errors result in exception */
4283 : /* AE_CRITICAL_ASSERT(state!=NULL); */
4284 :
4285 0 : dst = (ae_shared_pool*)_dst;
4286 0 : src = (ae_shared_pool*)_src;
4287 0 : ae_shared_pool_init(dst, state, make_automatic);
4288 :
4289 : /* copy non-pointer fields */
4290 0 : dst->size_of_object = src->size_of_object;
4291 0 : dst->init = src->init;
4292 0 : dst->init_copy = src->init_copy;
4293 0 : dst->destroy = src->destroy;
4294 :
4295 : /* copy seed object */
4296 0 : if( src->seed_object!=NULL )
4297 : {
4298 0 : dst->seed_object = ae_malloc(dst->size_of_object, state);
4299 0 : memset(dst->seed_object, 0, dst->size_of_object);
4300 0 : dst->init_copy(dst->seed_object, src->seed_object, state, ae_false);
4301 : }
4302 :
4303 : /* copy recycled objects */
4304 0 : dst->recycled_objects = NULL;
4305 0 : for(ptr=src->recycled_objects; ptr!=NULL; ptr=(ae_shared_pool_entry*)ptr->next_entry)
4306 : {
4307 : ae_shared_pool_entry *tmp;
4308 :
4309 : /* allocate entry, immediately add to the recycled list
4310 : (we do not want to lose it in case of future malloc failures) */
4311 0 : tmp = (ae_shared_pool_entry*)ae_malloc(sizeof(ae_shared_pool_entry), state);
4312 0 : memset(tmp, 0, sizeof(*tmp));
4313 0 : tmp->next_entry = dst->recycled_objects;
4314 0 : dst->recycled_objects = tmp;
4315 :
4316 : /* prepare place for object, init_copy() it */
4317 0 : tmp->obj = ae_malloc(dst->size_of_object, state);
4318 0 : memset(tmp->obj, 0, dst->size_of_object);
4319 0 : dst->init_copy(tmp->obj, ptr->obj, state, ae_false);
4320 : }
4321 :
4322 : /* recycled entries are not copied because they do not store any information */
4323 0 : dst->recycled_entries = NULL;
4324 :
4325 : /* enumeration counter is reset on copying */
4326 0 : dst->enumeration_counter = NULL;
4327 :
4328 : /* initialize frame record */
4329 0 : dst->frame_entry.deallocator = ae_shared_pool_destroy;
4330 0 : dst->frame_entry.ptr = dst;
4331 0 : }
4332 :
4333 :
4334 : /************************************************************************
4335 : This function performs destruction of the pool object.
4336 :
4337 : NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
4338 : you should NOT call it when pool can be used by another thread.
4339 : ************************************************************************/
4340 0 : void ae_shared_pool_clear(void *_dst)
4341 : {
4342 0 : ae_shared_pool *dst = (ae_shared_pool*)_dst;
4343 :
4344 : /* clear seed and lists */
4345 0 : ae_shared_pool_internalclear(dst);
4346 :
4347 : /* clear fields */
4348 0 : dst->seed_object = NULL;
4349 0 : dst->recycled_objects = NULL;
4350 0 : dst->recycled_entries = NULL;
4351 0 : dst->enumeration_counter = NULL;
4352 0 : dst->size_of_object = 0;
4353 0 : dst->init = NULL;
4354 0 : dst->init_copy = NULL;
4355 0 : dst->destroy = NULL;
4356 0 : }
4357 :
4358 0 : void ae_shared_pool_destroy(void *_dst)
4359 : {
4360 0 : ae_shared_pool *dst = (ae_shared_pool*)_dst;
4361 0 : ae_shared_pool_clear(_dst);
4362 0 : ae_free_lock(&dst->pool_lock);
4363 0 : }
4364 :
4365 :
4366 : /************************************************************************
4367 : This function returns True, if internal seed object was set. It returns
4368 : False for un-seeded pool.
4369 :
4370 : dst destination pool (initialized by constructor function)
4371 :
4372 : NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
4373 : you should NOT call it when lock can be used by another thread.
4374 : ************************************************************************/
4375 0 : ae_bool ae_shared_pool_is_initialized(void *_dst)
4376 : {
4377 0 : ae_shared_pool *dst = (ae_shared_pool*)_dst;
4378 0 : return dst->seed_object!=NULL;
4379 : }
4380 :
4381 :
4382 : /************************************************************************
4383 : This function sets internal seed object. All objects owned by the pool
4384 : (current seed object, recycled objects) are automatically freed.
4385 :
4386 : dst destination pool (initialized by constructor function)
4387 : seed_object new seed object
4388 : size_of_object sizeof(), used to allocate memory
4389 : init constructor function
4390 : init_copy copy constructor
4391 : clear destructor function
4392 : state ALGLIB environment state
4393 :
4394 : NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
4395 : you should NOT call it when lock can be used by another thread.
4396 : ************************************************************************/
4397 0 : void ae_shared_pool_set_seed(
4398 : ae_shared_pool *dst,
4399 : void *seed_object,
4400 : ae_int_t size_of_object,
4401 : void (*init)(void* dst, ae_state* state, ae_bool make_automatic),
4402 : void (*init_copy)(void* dst, void* src, ae_state* state, ae_bool make_automatic),
4403 : void (*destroy)(void* ptr),
4404 : ae_state *state)
4405 : {
4406 : /* state!=NULL, allocation errors result in exception */
4407 0 : AE_CRITICAL_ASSERT(state!=NULL);
4408 :
4409 : /* destroy internal objects */
4410 0 : ae_shared_pool_internalclear(dst);
4411 :
4412 : /* set non-pointer fields */
4413 0 : dst->size_of_object = size_of_object;
4414 0 : dst->init = init;
4415 0 : dst->init_copy = init_copy;
4416 0 : dst->destroy = destroy;
4417 :
4418 : /* set seed object */
4419 0 : dst->seed_object = ae_malloc(size_of_object, state);
4420 0 : memset(dst->seed_object, 0, size_of_object);
4421 0 : init_copy(dst->seed_object, seed_object, state, ae_false);
4422 0 : }
4423 :
4424 :
4425 : /************************************************************************
4426 : This function retrieves a copy of the seed object from the pool and
4427 : stores it to target smart pointer ptr.
4428 :
4429 : In case target pointer owns non-NULL value, it is deallocated before
4430 : storing value retrieved from pool. Target pointer becomes owner of the
4431 : value which was retrieved from pool.
4432 :
4433 : pool pool
4434 : pptr pointer to ae_smart_ptr structure
4435 : state ALGLIB environment state
4436 :
4437 : NOTE: this function IS thread-safe. It acquires pool lock during its
4438 : operation and can be used simultaneously from several threads.
4439 : ************************************************************************/
4440 0 : void ae_shared_pool_retrieve(
4441 : ae_shared_pool *pool,
4442 : ae_smart_ptr *pptr,
4443 : ae_state *state)
4444 : {
4445 : void *new_obj;
4446 :
4447 : /* state!=NULL, allocation errors are handled by throwing exception from ae_malloc() */
4448 0 : AE_CRITICAL_ASSERT(state!=NULL);
4449 :
4450 : /* assert that pool was seeded */
4451 0 : ae_assert(
4452 0 : pool->seed_object!=NULL,
4453 : "ALGLIB: shared pool is not seeded, PoolRetrieve() failed",
4454 : state);
4455 :
4456 : /* acquire lock */
4457 0 : ae_acquire_lock(&pool->pool_lock);
4458 :
4459 : /* try to reuse recycled objects */
4460 0 : if( pool->recycled_objects!=NULL )
4461 : {
4462 : ae_shared_pool_entry *result;
4463 :
4464 : /* retrieve entry/object from list of recycled objects */
4465 0 : result = pool->recycled_objects;
4466 0 : pool->recycled_objects = (ae_shared_pool_entry*)pool->recycled_objects->next_entry;
4467 0 : new_obj = result->obj;
4468 0 : result->obj = NULL;
4469 :
4470 : /* move entry to list of recycled entries */
4471 0 : result->next_entry = pool->recycled_entries;
4472 0 : pool->recycled_entries = result;
4473 :
4474 : /* release lock */
4475 0 : ae_release_lock(&pool->pool_lock);
4476 :
4477 : /* assign object to smart pointer */
4478 0 : ae_smart_ptr_assign(pptr, new_obj, ae_true, ae_true, pool->destroy);
4479 0 : return;
4480 : }
4481 :
4482 : /* release lock; we do not need it anymore because copy constructor does not modify source variable */
4483 0 : ae_release_lock(&pool->pool_lock);
4484 :
4485 : /* create new object from seed, immediately assign object to smart pointer
4486 : (do not want to lose it in case of future failures) */
4487 0 : new_obj = ae_malloc(pool->size_of_object, state);
4488 0 : memset(new_obj, 0, pool->size_of_object);
4489 0 : ae_smart_ptr_assign(pptr, new_obj, ae_true, ae_true, pool->destroy);
4490 :
4491 : /* perform actual copying; before this line smartptr points to zero-filled instance */
4492 0 : pool->init_copy(new_obj, pool->seed_object, state, ae_false);
4493 : }
4494 :
4495 :
4496 : /************************************************************************
4497 : This function recycles object owned by smart pointer by moving it to
4498 : internal storage of the shared pool.
4499 :
4500 : Source pointer must own the object. After function is over, it owns NULL
4501 : pointer.
4502 :
4503 : pool pool
4504 : pptr pointer to ae_smart_ptr structure
4505 : state ALGLIB environment state
4506 :
4507 : NOTE: this function IS thread-safe. It acquires pool lock during its
4508 : operation and can be used simultaneously from several threads.
4509 : ************************************************************************/
4510 0 : void ae_shared_pool_recycle(
4511 : ae_shared_pool *pool,
4512 : ae_smart_ptr *pptr,
4513 : ae_state *state)
4514 : {
4515 : ae_shared_pool_entry *new_entry;
4516 :
4517 : /* state!=NULL, allocation errors are handled by throwing exception from ae_malloc() */
4518 0 : AE_CRITICAL_ASSERT(state!=NULL);
4519 :
4520 : /* assert that pool was seeded */
4521 0 : ae_assert(
4522 0 : pool->seed_object!=NULL,
4523 : "ALGLIB: shared pool is not seeded, PoolRecycle() failed",
4524 : state);
4525 :
4526 : /* assert that pointer non-null and owns the object */
4527 0 : ae_assert(pptr->is_owner, "ALGLIB: pptr in ae_shared_pool_recycle() does not own its pointer", state);
4528 0 : ae_assert(pptr->ptr!=NULL, "ALGLIB: pptr in ae_shared_pool_recycle() is NULL", state);
4529 :
4530 : /* acquire lock */
4531 0 : ae_acquire_lock(&pool->pool_lock);
4532 :
4533 : /* acquire shared pool entry (reuse one from recycled_entries or allocate new one) */
4534 0 : if( pool->recycled_entries!=NULL )
4535 : {
4536 : /* reuse previously allocated entry */
4537 0 : new_entry = pool->recycled_entries;
4538 0 : pool->recycled_entries = (ae_shared_pool_entry*)new_entry->next_entry;
4539 : }
4540 : else
4541 : {
4542 : /*
4543 : * Allocate memory for new entry.
4544 : *
4545 : * NOTE: we release pool lock during allocation because ae_malloc() may raise
4546 : * exception and we do not want our pool to be left in the locked state.
4547 : */
4548 0 : ae_release_lock(&pool->pool_lock);
4549 0 : new_entry = (ae_shared_pool_entry*)ae_malloc(sizeof(ae_shared_pool_entry), state);
4550 0 : ae_acquire_lock(&pool->pool_lock);
4551 : }
4552 :
4553 : /* add object to the list of recycled objects */
4554 0 : new_entry->obj = pptr->ptr;
4555 0 : new_entry->next_entry = pool->recycled_objects;
4556 0 : pool->recycled_objects = new_entry;
4557 :
4558 : /* release lock object */
4559 0 : ae_release_lock(&pool->pool_lock);
4560 :
4561 : /* release source pointer */
4562 0 : ae_smart_ptr_release(pptr);
4563 0 : }
4564 :
4565 :
4566 : /************************************************************************
4567 : This function clears internal list of recycled objects, but does not
4568 : change seed object managed by the pool.
4569 :
4570 : pool pool
4571 : state ALGLIB environment state
4572 :
4573 : NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
4574 : you should NOT call it when lock can be used by another thread.
4575 : ************************************************************************/
4576 0 : void ae_shared_pool_clear_recycled(
4577 : ae_shared_pool *pool,
4578 : ae_state *state)
4579 : {
4580 : ae_shared_pool_entry *ptr, *tmp;
4581 :
4582 : /* clear recycled objects */
4583 0 : for(ptr=pool->recycled_objects; ptr!=NULL;)
4584 : {
4585 0 : tmp = (ae_shared_pool_entry*)ptr->next_entry;
4586 0 : pool->destroy(ptr->obj);
4587 0 : ae_free(ptr->obj);
4588 0 : ae_free(ptr);
4589 0 : ptr = tmp;
4590 : }
4591 0 : pool->recycled_objects = NULL;
4592 0 : }
4593 :
4594 :
4595 : /************************************************************************
4596 : This function allows to enumerate recycled elements of the shared pool.
4597 : It stores pointer to the first recycled object in the smart pointer.
4598 :
4599 : IMPORTANT:
4600 : * in case target pointer owns non-NULL value, it is deallocated before
4601 : storing value retrieved from pool.
4602 : * recycled object IS NOT removed from pool
4603 : * target pointer DOES NOT become owner of the new value
4604 : * this function IS NOT thread-safe
4605 : * you SHOULD NOT modify shared pool during enumeration (although you can
4606 : modify state of the objects retrieved from pool)
4607 : * in case there is no recycled objects in the pool, NULL is stored to pptr
4608 : * in case pool is not seeded, NULL is stored to pptr
4609 :
4610 : pool pool
4611 : pptr pointer to ae_smart_ptr structure
4612 : state ALGLIB environment state
4613 : ************************************************************************/
4614 0 : void ae_shared_pool_first_recycled(
4615 : ae_shared_pool *pool,
4616 : ae_smart_ptr *pptr,
4617 : ae_state *state)
4618 : {
4619 : /* modify internal enumeration counter */
4620 0 : pool->enumeration_counter = pool->recycled_objects;
4621 :
4622 : /* exit on empty list */
4623 0 : if( pool->enumeration_counter==NULL )
4624 : {
4625 0 : ae_smart_ptr_assign(pptr, NULL, ae_false, ae_false, NULL);
4626 0 : return;
4627 : }
4628 :
4629 : /* assign object to smart pointer */
4630 0 : ae_smart_ptr_assign(pptr, pool->enumeration_counter->obj, ae_false, ae_false, pool->destroy);
4631 : }
4632 :
4633 :
4634 : /************************************************************************
4635 : This function allows to enumerate recycled elements of the shared pool.
4636 : It stores pointer to the next recycled object in the smart pointer.
4637 :
4638 : IMPORTANT:
4639 : * in case target pointer owns non-NULL value, it is deallocated before
4640 : storing value retrieved from pool.
4641 : * recycled object IS NOT removed from pool
4642 : * target pointer DOES NOT become owner of the new value
4643 : * this function IS NOT thread-safe
4644 : * you SHOULD NOT modify shared pool during enumeration (although you can
4645 : modify state of the objects retrieved from pool)
4646 : * in case there is no recycled objects left in the pool, NULL is stored.
4647 : * in case pool is not seeded, NULL is stored.
4648 :
4649 : pool pool
4650 : pptr pointer to ae_smart_ptr structure
4651 : state ALGLIB environment state
4652 : ************************************************************************/
4653 0 : void ae_shared_pool_next_recycled(
4654 : ae_shared_pool *pool,
4655 : ae_smart_ptr *pptr,
4656 : ae_state *state)
4657 : {
4658 : /* exit on end of list */
4659 0 : if( pool->enumeration_counter==NULL )
4660 : {
4661 0 : ae_smart_ptr_assign(pptr, NULL, ae_false, ae_false, NULL);
4662 0 : return;
4663 : }
4664 :
4665 : /* modify internal enumeration counter */
4666 0 : pool->enumeration_counter = (ae_shared_pool_entry*)pool->enumeration_counter->next_entry;
4667 :
4668 : /* exit on empty list */
4669 0 : if( pool->enumeration_counter==NULL )
4670 : {
4671 0 : ae_smart_ptr_assign(pptr, NULL, ae_false, ae_false, NULL);
4672 0 : return;
4673 : }
4674 :
4675 : /* assign object to smart pointer */
4676 0 : ae_smart_ptr_assign(pptr, pool->enumeration_counter->obj, ae_false, ae_false, pool->destroy);
4677 : }
4678 :
4679 :
4680 :
4681 : /************************************************************************
4682 : This function clears internal list of recycled objects and seed object.
4683 : However, pool still can be used (after initialization with another seed).
4684 :
4685 : pool pool
4686 : state ALGLIB environment state
4687 :
4688 : NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
4689 : you should NOT call it when lock can be used by another thread.
4690 : ************************************************************************/
4691 0 : void ae_shared_pool_reset(
4692 : ae_shared_pool *pool,
4693 : ae_state *state)
4694 : {
4695 : /* clear seed and lists */
4696 0 : ae_shared_pool_internalclear(pool);
4697 :
4698 : /* clear fields */
4699 0 : pool->seed_object = NULL;
4700 0 : pool->recycled_objects = NULL;
4701 0 : pool->recycled_entries = NULL;
4702 0 : pool->enumeration_counter = NULL;
4703 0 : pool->size_of_object = 0;
4704 0 : pool->init = NULL;
4705 0 : pool->init_copy = NULL;
4706 0 : pool->destroy = NULL;
4707 0 : }
4708 :
4709 :
4710 : /************************************************************************
4711 : This function initializes serializer
4712 : ************************************************************************/
4713 0 : void ae_serializer_init(ae_serializer *serializer)
4714 : {
4715 0 : serializer->mode = AE_SM_DEFAULT;
4716 0 : serializer->entries_needed = 0;
4717 0 : serializer->bytes_asked = 0;
4718 0 : }
4719 :
4720 0 : void ae_serializer_clear(ae_serializer *serializer)
4721 : {
4722 0 : }
4723 :
4724 0 : void ae_serializer_alloc_start(ae_serializer *serializer)
4725 : {
4726 0 : serializer->entries_needed = 0;
4727 0 : serializer->bytes_asked = 0;
4728 0 : serializer->mode = AE_SM_ALLOC;
4729 0 : }
4730 :
4731 0 : void ae_serializer_alloc_entry(ae_serializer *serializer)
4732 : {
4733 0 : serializer->entries_needed++;
4734 0 : }
4735 :
4736 0 : void ae_serializer_alloc_byte_array(ae_serializer *serializer, ae_vector *bytes)
4737 : {
4738 : ae_int_t n;
4739 0 : n = bytes->cnt;
4740 0 : n = n/8 + (n%8>0 ? 1 : 0);
4741 0 : serializer->entries_needed += 1+n;
4742 0 : }
4743 :
4744 : /************************************************************************
4745 : After allocation phase is done, this function returns required size of
4746 : the output string buffer (including trailing zero symbol). Actual size of
4747 : the data being stored can be a few characters smaller than requested.
4748 : ************************************************************************/
4749 0 : ae_int_t ae_serializer_get_alloc_size(ae_serializer *serializer)
4750 : {
4751 : ae_int_t rows, lastrowsize, result;
4752 :
4753 0 : serializer->mode = AE_SM_READY2S;
4754 :
4755 : /* if no entries needes (degenerate case) */
4756 0 : if( serializer->entries_needed==0 )
4757 : {
4758 0 : serializer->bytes_asked = 4; /* a pair of chars for \r\n, one for dot, one for trailing zero */
4759 0 : return serializer->bytes_asked;
4760 : }
4761 :
4762 : /* non-degenerate case */
4763 0 : rows = serializer->entries_needed/AE_SER_ENTRIES_PER_ROW;
4764 0 : lastrowsize = AE_SER_ENTRIES_PER_ROW;
4765 0 : if( serializer->entries_needed%AE_SER_ENTRIES_PER_ROW )
4766 : {
4767 0 : lastrowsize = serializer->entries_needed%AE_SER_ENTRIES_PER_ROW;
4768 0 : rows++;
4769 : }
4770 :
4771 : /* calculate result size */
4772 0 : result = ((rows-1)*AE_SER_ENTRIES_PER_ROW+lastrowsize)*AE_SER_ENTRY_LENGTH; /* data size */
4773 0 : result += (rows-1)*(AE_SER_ENTRIES_PER_ROW-1)+(lastrowsize-1); /* space symbols */
4774 0 : result += rows*2; /* newline symbols */
4775 0 : result += 1; /* trailing dot */
4776 0 : result += 1; /* trailing zero */
4777 0 : serializer->bytes_asked = result;
4778 0 : return result;
4779 : }
4780 :
4781 : #ifdef AE_USE_CPP_SERIALIZATION
4782 0 : void ae_serializer_sstart_str(ae_serializer *serializer, std::string *buf)
4783 : {
4784 0 : serializer->mode = AE_SM_TO_CPPSTRING;
4785 0 : serializer->out_cppstr = buf;
4786 0 : serializer->entries_saved = 0;
4787 0 : serializer->bytes_written = 0;
4788 0 : }
4789 :
4790 0 : void ae_serializer_ustart_str(ae_serializer *serializer, const std::string *buf)
4791 : {
4792 0 : serializer->mode = AE_SM_FROM_STRING;
4793 0 : serializer->in_str = buf->c_str();
4794 0 : }
4795 :
4796 0 : static char cpp_writer(const char *p_string, ae_int_t aux)
4797 : {
4798 0 : std::ostream *stream = reinterpret_cast<std::ostream*>(aux);
4799 0 : stream->write(p_string, strlen(p_string));
4800 0 : return stream->bad() ? 1 : 0;
4801 : }
4802 :
4803 0 : static char cpp_reader(ae_int_t aux, ae_int_t cnt, char *p_buf)
4804 : {
4805 0 : std::istream *stream = reinterpret_cast<std::istream*>(aux);
4806 : int c;
4807 0 : if( cnt<=0 )
4808 0 : return 1; /* unexpected cnt */
4809 : for(;;)
4810 : {
4811 0 : c = stream->get();
4812 0 : if( c<0 || c>255 )
4813 0 : return 1; /* failure! */
4814 0 : if( c!=' ' && c!='\t' && c!='\n' && c!='\r' )
4815 0 : break;
4816 : }
4817 0 : p_buf[0] = (char)c;
4818 0 : for(int k=1; k<cnt; k++)
4819 : {
4820 0 : c = stream->get();
4821 0 : if( c<0 || c>255 || c==' ' || c=='\t' || c=='\n' || c=='\r' )
4822 0 : return 1; /* failure! */
4823 0 : p_buf[k] = (char)c;
4824 : }
4825 0 : p_buf[cnt] = 0;
4826 0 : return 0; /* success */
4827 : }
4828 :
4829 0 : void ae_serializer_sstart_stream(ae_serializer *serializer, std::ostream *stream)
4830 : {
4831 0 : serializer->mode = AE_SM_TO_STREAM;
4832 0 : serializer->stream_writer = cpp_writer;
4833 0 : serializer->stream_aux = reinterpret_cast<ae_int_t>(stream);
4834 0 : serializer->entries_saved = 0;
4835 0 : serializer->bytes_written = 0;
4836 0 : }
4837 :
4838 0 : void ae_serializer_ustart_stream(ae_serializer *serializer, const std::istream *stream)
4839 : {
4840 0 : serializer->mode = AE_SM_FROM_STREAM;
4841 0 : serializer->stream_reader = cpp_reader;
4842 0 : serializer->stream_aux = reinterpret_cast<ae_int_t>(stream);
4843 0 : }
4844 : #endif
4845 :
4846 0 : void ae_serializer_sstart_str(ae_serializer *serializer, char *buf)
4847 : {
4848 0 : serializer->mode = AE_SM_TO_STRING;
4849 0 : serializer->out_str = buf;
4850 0 : serializer->out_str[0] = 0;
4851 0 : serializer->entries_saved = 0;
4852 0 : serializer->bytes_written = 0;
4853 0 : }
4854 :
4855 0 : void ae_serializer_ustart_str(ae_serializer *serializer, const char *buf)
4856 : {
4857 0 : serializer->mode = AE_SM_FROM_STRING;
4858 0 : serializer->in_str = buf;
4859 0 : }
4860 :
4861 0 : void ae_serializer_sstart_stream(ae_serializer *serializer, ae_stream_writer writer, ae_int_t aux)
4862 : {
4863 0 : serializer->mode = AE_SM_TO_STREAM;
4864 0 : serializer->stream_writer = writer;
4865 0 : serializer->stream_aux = aux;
4866 0 : serializer->entries_saved = 0;
4867 0 : serializer->bytes_written = 0;
4868 0 : }
4869 :
4870 0 : void ae_serializer_ustart_stream(ae_serializer *serializer, ae_stream_reader reader, ae_int_t aux)
4871 : {
4872 0 : serializer->mode = AE_SM_FROM_STREAM;
4873 0 : serializer->stream_reader = reader;
4874 0 : serializer->stream_aux = aux;
4875 0 : }
4876 :
4877 0 : void ae_serializer_serialize_bool(ae_serializer *serializer, ae_bool v, ae_state *state)
4878 : {
4879 : char buf[AE_SER_ENTRY_LENGTH+2+1];
4880 0 : const char *emsg = "ALGLIB: serialization integrity error";
4881 : ae_int_t bytes_appended;
4882 :
4883 : /* prepare serialization, check consistency */
4884 0 : ae_bool2str(v, buf, state);
4885 0 : serializer->entries_saved++;
4886 0 : if( serializer->entries_saved%AE_SER_ENTRIES_PER_ROW )
4887 0 : strcat(buf, " ");
4888 : else
4889 0 : strcat(buf, "\r\n");
4890 0 : bytes_appended = (ae_int_t)strlen(buf);
4891 0 : ae_assert(serializer->bytes_written+bytes_appended<serializer->bytes_asked, emsg, state); /* strict "less" because we need space for trailing zero */
4892 0 : serializer->bytes_written += bytes_appended;
4893 :
4894 : /* append to buffer */
4895 : #ifdef AE_USE_CPP_SERIALIZATION
4896 0 : if( serializer->mode==AE_SM_TO_CPPSTRING )
4897 : {
4898 0 : *(serializer->out_cppstr) += buf;
4899 0 : return;
4900 : }
4901 : #endif
4902 0 : if( serializer->mode==AE_SM_TO_STRING )
4903 : {
4904 0 : strcat(serializer->out_str, buf);
4905 0 : serializer->out_str += bytes_appended;
4906 0 : return;
4907 : }
4908 0 : if( serializer->mode==AE_SM_TO_STREAM )
4909 : {
4910 0 : ae_assert(serializer->stream_writer(buf, serializer->stream_aux)==0, "serializer: error writing to stream", state);
4911 0 : return;
4912 : }
4913 0 : ae_break(state, ERR_ASSERTION_FAILED, emsg);
4914 : }
4915 :
4916 0 : void ae_serializer_serialize_int(ae_serializer *serializer, ae_int_t v, ae_state *state)
4917 : {
4918 : char buf[AE_SER_ENTRY_LENGTH+2+1];
4919 0 : const char *emsg = "ALGLIB: serialization integrity error";
4920 : ae_int_t bytes_appended;
4921 :
4922 : /* prepare serialization, check consistency */
4923 0 : ae_int2str(v, buf, state);
4924 0 : serializer->entries_saved++;
4925 0 : if( serializer->entries_saved%AE_SER_ENTRIES_PER_ROW )
4926 0 : strcat(buf, " ");
4927 : else
4928 0 : strcat(buf, "\r\n");
4929 0 : bytes_appended = (ae_int_t)strlen(buf);
4930 0 : ae_assert(serializer->bytes_written+bytes_appended<serializer->bytes_asked, emsg, state); /* strict "less" because we need space for trailing zero */
4931 0 : serializer->bytes_written += bytes_appended;
4932 :
4933 : /* append to buffer */
4934 : #ifdef AE_USE_CPP_SERIALIZATION
4935 0 : if( serializer->mode==AE_SM_TO_CPPSTRING )
4936 : {
4937 0 : *(serializer->out_cppstr) += buf;
4938 0 : return;
4939 : }
4940 : #endif
4941 0 : if( serializer->mode==AE_SM_TO_STRING )
4942 : {
4943 0 : strcat(serializer->out_str, buf);
4944 0 : serializer->out_str += bytes_appended;
4945 0 : return;
4946 : }
4947 0 : if( serializer->mode==AE_SM_TO_STREAM )
4948 : {
4949 0 : ae_assert(serializer->stream_writer(buf, serializer->stream_aux)==0, "serializer: error writing to stream", state);
4950 0 : return;
4951 : }
4952 0 : ae_break(state, ERR_ASSERTION_FAILED, emsg);
4953 : }
4954 :
4955 0 : void ae_serializer_serialize_int64(ae_serializer *serializer, ae_int64_t v, ae_state *state)
4956 : {
4957 : char buf[AE_SER_ENTRY_LENGTH+2+1];
4958 0 : const char *emsg = "ALGLIB: serialization integrity error";
4959 : ae_int_t bytes_appended;
4960 :
4961 : /* prepare serialization, check consistency */
4962 0 : ae_int642str(v, buf, state);
4963 0 : serializer->entries_saved++;
4964 0 : if( serializer->entries_saved%AE_SER_ENTRIES_PER_ROW )
4965 0 : strcat(buf, " ");
4966 : else
4967 0 : strcat(buf, "\r\n");
4968 0 : bytes_appended = (ae_int_t)strlen(buf);
4969 0 : ae_assert(serializer->bytes_written+bytes_appended<serializer->bytes_asked, emsg, state); /* strict "less" because we need space for trailing zero */
4970 0 : serializer->bytes_written += bytes_appended;
4971 :
4972 : /* append to buffer */
4973 : #ifdef AE_USE_CPP_SERIALIZATION
4974 0 : if( serializer->mode==AE_SM_TO_CPPSTRING )
4975 : {
4976 0 : *(serializer->out_cppstr) += buf;
4977 0 : return;
4978 : }
4979 : #endif
4980 0 : if( serializer->mode==AE_SM_TO_STRING )
4981 : {
4982 0 : strcat(serializer->out_str, buf);
4983 0 : serializer->out_str += bytes_appended;
4984 0 : return;
4985 : }
4986 0 : if( serializer->mode==AE_SM_TO_STREAM )
4987 : {
4988 0 : ae_assert(serializer->stream_writer(buf, serializer->stream_aux)==0, "serializer: error writing to stream", state);
4989 0 : return;
4990 : }
4991 0 : ae_break(state, ERR_ASSERTION_FAILED, emsg);
4992 : }
4993 :
4994 0 : void ae_serializer_serialize_double(ae_serializer *serializer, double v, ae_state *state)
4995 : {
4996 : char buf[AE_SER_ENTRY_LENGTH+2+1];
4997 0 : const char *emsg = "ALGLIB: serialization integrity error";
4998 : ae_int_t bytes_appended;
4999 :
5000 : /* prepare serialization, check consistency */
5001 0 : ae_double2str(v, buf, state);
5002 0 : serializer->entries_saved++;
5003 0 : if( serializer->entries_saved%AE_SER_ENTRIES_PER_ROW )
5004 0 : strcat(buf, " ");
5005 : else
5006 0 : strcat(buf, "\r\n");
5007 0 : bytes_appended = (ae_int_t)strlen(buf);
5008 0 : ae_assert(serializer->bytes_written+bytes_appended<serializer->bytes_asked, emsg, state); /* strict "less" because we need space for trailing zero */
5009 0 : serializer->bytes_written += bytes_appended;
5010 :
5011 : /* append to buffer */
5012 : #ifdef AE_USE_CPP_SERIALIZATION
5013 0 : if( serializer->mode==AE_SM_TO_CPPSTRING )
5014 : {
5015 0 : *(serializer->out_cppstr) += buf;
5016 0 : return;
5017 : }
5018 : #endif
5019 0 : if( serializer->mode==AE_SM_TO_STRING )
5020 : {
5021 0 : strcat(serializer->out_str, buf);
5022 0 : serializer->out_str += bytes_appended;
5023 0 : return;
5024 : }
5025 0 : if( serializer->mode==AE_SM_TO_STREAM )
5026 : {
5027 0 : ae_assert(serializer->stream_writer(buf, serializer->stream_aux)==0, "serializer: error writing to stream", state);
5028 0 : return;
5029 : }
5030 0 : ae_break(state, ERR_ASSERTION_FAILED, emsg);
5031 : }
5032 :
5033 0 : void ae_serializer_serialize_byte_array(ae_serializer *serializer, ae_vector *bytes, ae_state *state)
5034 : {
5035 : ae_int_t chunk_size, entries_count;
5036 :
5037 0 : chunk_size = 8;
5038 :
5039 : /* save array length */
5040 0 : ae_serializer_serialize_int(serializer, bytes->cnt, state);
5041 :
5042 : /* determine entries count */
5043 0 : entries_count = bytes->cnt/chunk_size + (bytes->cnt%chunk_size>0 ? 1 : 0);
5044 0 : for(ae_int_t eidx=0; eidx<entries_count; eidx++)
5045 : {
5046 : ae_int64_t tmpi;
5047 : ae_int_t elen;
5048 0 : elen = bytes->cnt - eidx*chunk_size;
5049 0 : elen = elen>chunk_size ? chunk_size : elen;
5050 : memset(&tmpi, 0, sizeof(tmpi));
5051 0 : memmove(&tmpi, bytes->ptr.p_ubyte + eidx*chunk_size, elen);
5052 0 : ae_serializer_serialize_int64(serializer, tmpi, state);
5053 : }
5054 0 : }
5055 :
5056 0 : void ae_serializer_unserialize_bool(ae_serializer *serializer, ae_bool *v, ae_state *state)
5057 : {
5058 0 : if( serializer->mode==AE_SM_FROM_STRING )
5059 : {
5060 0 : *v = ae_str2bool(serializer->in_str, state, &serializer->in_str);
5061 0 : return;
5062 : }
5063 0 : if( serializer->mode==AE_SM_FROM_STREAM )
5064 : {
5065 : char buf[AE_SER_ENTRY_LENGTH+2+1];
5066 0 : const char *p = buf;
5067 0 : ae_assert(serializer->stream_reader(serializer->stream_aux, AE_SER_ENTRY_LENGTH, buf)==0, "serializer: error reading from stream", state);
5068 0 : *v = ae_str2bool(buf, state, &p);
5069 0 : return;
5070 : }
5071 0 : ae_break(state, ERR_ASSERTION_FAILED, "ae_serializer: integrity check failed");
5072 : }
5073 :
5074 0 : void ae_serializer_unserialize_int(ae_serializer *serializer, ae_int_t *v, ae_state *state)
5075 : {
5076 0 : if( serializer->mode==AE_SM_FROM_STRING )
5077 : {
5078 0 : *v = ae_str2int(serializer->in_str, state, &serializer->in_str);
5079 0 : return;
5080 : }
5081 0 : if( serializer->mode==AE_SM_FROM_STREAM )
5082 : {
5083 : char buf[AE_SER_ENTRY_LENGTH+2+1];
5084 0 : const char *p = buf;
5085 0 : ae_assert(serializer->stream_reader(serializer->stream_aux, AE_SER_ENTRY_LENGTH, buf)==0, "serializer: error reading from stream", state);
5086 0 : *v = ae_str2int(buf, state, &p);
5087 0 : return;
5088 : }
5089 0 : ae_break(state, ERR_ASSERTION_FAILED, "ae_serializer: integrity check failed");
5090 : }
5091 :
5092 0 : void ae_serializer_unserialize_int64(ae_serializer *serializer, ae_int64_t *v, ae_state *state)
5093 : {
5094 0 : if( serializer->mode==AE_SM_FROM_STRING )
5095 : {
5096 0 : *v = ae_str2int64(serializer->in_str, state, &serializer->in_str);
5097 0 : return;
5098 : }
5099 0 : if( serializer->mode==AE_SM_FROM_STREAM )
5100 : {
5101 : char buf[AE_SER_ENTRY_LENGTH+2+1];
5102 0 : const char *p = buf;
5103 0 : ae_assert(serializer->stream_reader(serializer->stream_aux, AE_SER_ENTRY_LENGTH, buf)==0, "serializer: error reading from stream", state);
5104 0 : *v = ae_str2int64(buf, state, &p);
5105 0 : return;
5106 : }
5107 0 : ae_break(state, ERR_ASSERTION_FAILED, "ae_serializer: integrity check failed");
5108 : }
5109 :
5110 0 : void ae_serializer_unserialize_double(ae_serializer *serializer, double *v, ae_state *state)
5111 : {
5112 0 : if( serializer->mode==AE_SM_FROM_STRING )
5113 : {
5114 0 : *v = ae_str2double(serializer->in_str, state, &serializer->in_str);
5115 0 : return;
5116 : }
5117 0 : if( serializer->mode==AE_SM_FROM_STREAM )
5118 : {
5119 : char buf[AE_SER_ENTRY_LENGTH+2+1];
5120 0 : const char *p = buf;
5121 0 : ae_assert(serializer->stream_reader(serializer->stream_aux, AE_SER_ENTRY_LENGTH, buf)==0, "serializer: error reading from stream", state);
5122 0 : *v = ae_str2double(buf, state, &p);
5123 0 : return;
5124 : }
5125 0 : ae_break(state, ERR_ASSERTION_FAILED, "ae_serializer: integrity check failed");
5126 : }
5127 :
5128 0 : void ae_serializer_unserialize_byte_array(ae_serializer *serializer, ae_vector *bytes, ae_state *state)
5129 : {
5130 : ae_int_t chunk_size, n, entries_count;
5131 :
5132 0 : chunk_size = 8;
5133 :
5134 : /* read array length, allocate output */
5135 0 : ae_serializer_unserialize_int(serializer, &n, state);
5136 0 : ae_vector_set_length(bytes, n, state);
5137 :
5138 : /* determine entries count, read entries */
5139 0 : entries_count = n/chunk_size + (n%chunk_size>0 ? 1 : 0);
5140 0 : for(ae_int_t eidx=0; eidx<entries_count; eidx++)
5141 : {
5142 : ae_int_t elen;
5143 : ae_int64_t tmp64;
5144 :
5145 0 : elen = n-eidx*chunk_size;
5146 0 : elen = elen>chunk_size ? chunk_size : elen;
5147 0 : ae_serializer_unserialize_int64(serializer, &tmp64, state);
5148 0 : memmove(bytes->ptr.p_ubyte+eidx*chunk_size, &tmp64, elen);
5149 : }
5150 0 : }
5151 :
5152 0 : void ae_serializer_stop(ae_serializer *serializer, ae_state *state)
5153 : {
5154 : #ifdef AE_USE_CPP_SERIALIZATION
5155 0 : if( serializer->mode==AE_SM_TO_CPPSTRING )
5156 : {
5157 0 : ae_assert(serializer->bytes_written+1<serializer->bytes_asked, "ae_serializer: integrity check failed", state);/* strict "less" because we need space for trailing zero */
5158 0 : serializer->bytes_written++;
5159 0 : *(serializer->out_cppstr) += ".";
5160 0 : return;
5161 : }
5162 : #endif
5163 0 : if( serializer->mode==AE_SM_TO_STRING )
5164 : {
5165 0 : ae_assert(serializer->bytes_written+1<serializer->bytes_asked, "ae_serializer: integrity check failed", state); /* strict "less" because we need space for trailing zero */
5166 0 : serializer->bytes_written++;
5167 0 : strcat(serializer->out_str, ".");
5168 0 : serializer->out_str += 1;
5169 0 : return;
5170 : }
5171 0 : if( serializer->mode==AE_SM_TO_STREAM )
5172 : {
5173 0 : ae_assert(serializer->bytes_written+1<serializer->bytes_asked, "ae_serializer: integrity check failed", state); /* strict "less" because we need space for trailing zero */
5174 0 : serializer->bytes_written++;
5175 0 : ae_assert(serializer->stream_writer(".", serializer->stream_aux)==0, "ae_serializer: error writing to stream", state);
5176 0 : return;
5177 : }
5178 0 : if( serializer->mode==AE_SM_FROM_STRING )
5179 : {
5180 : /*
5181 : * because input string may be from pre-3.11 serializer,
5182 : * which does not include trailing dot, we do not test
5183 : * string for presence of "." symbol. Anyway, because string
5184 : * is not stream, we do not have to read ALL trailing symbols.
5185 : */
5186 0 : return;
5187 : }
5188 0 : if( serializer->mode==AE_SM_FROM_STREAM )
5189 : {
5190 : /*
5191 : * Read trailing dot, perform integrity check
5192 : */
5193 : char buf[2];
5194 0 : ae_assert(serializer->stream_reader(serializer->stream_aux, 1, buf)==0, "ae_serializer: error reading from stream", state);
5195 0 : ae_assert(buf[0]=='.', "ae_serializer: trailing . is not found in the stream", state);
5196 0 : return;
5197 : }
5198 0 : ae_break(state, ERR_ASSERTION_FAILED, "ae_serializer: integrity check failed");
5199 : }
5200 :
5201 :
5202 : /************************************************************************
5203 : Complex math functions
5204 : ************************************************************************/
5205 0 : ae_complex ae_complex_from_i(ae_int_t v)
5206 : {
5207 : ae_complex r;
5208 0 : r.x = (double)v;
5209 0 : r.y = 0.0;
5210 0 : return r;
5211 : }
5212 :
5213 0 : ae_complex ae_complex_from_d(double v)
5214 : {
5215 : ae_complex r;
5216 0 : r.x = v;
5217 0 : r.y = 0.0;
5218 0 : return r;
5219 : }
5220 :
5221 0 : ae_complex ae_c_neg(ae_complex lhs)
5222 : {
5223 : ae_complex result;
5224 0 : result.x = -lhs.x;
5225 0 : result.y = -lhs.y;
5226 0 : return result;
5227 : }
5228 :
5229 0 : ae_complex ae_c_conj(ae_complex lhs, ae_state *state)
5230 : {
5231 : ae_complex result;
5232 0 : result.x = +lhs.x;
5233 0 : result.y = -lhs.y;
5234 0 : return result;
5235 : }
5236 :
5237 0 : ae_complex ae_c_sqr(ae_complex lhs, ae_state *state)
5238 : {
5239 : ae_complex result;
5240 0 : result.x = lhs.x*lhs.x-lhs.y*lhs.y;
5241 0 : result.y = 2*lhs.x*lhs.y;
5242 0 : return result;
5243 : }
5244 :
5245 0 : double ae_c_abs(ae_complex z, ae_state *state)
5246 : {
5247 : double w;
5248 : double xabs;
5249 : double yabs;
5250 : double v;
5251 :
5252 0 : xabs = fabs(z.x);
5253 0 : yabs = fabs(z.y);
5254 0 : w = xabs>yabs ? xabs : yabs;
5255 0 : v = xabs<yabs ? xabs : yabs;
5256 0 : if( v==0 )
5257 0 : return w;
5258 : else
5259 : {
5260 0 : double t = v/w;
5261 0 : return w*sqrt(1+t*t);
5262 : }
5263 : }
5264 :
5265 0 : ae_bool ae_c_eq(ae_complex lhs, ae_complex rhs)
5266 : {
5267 0 : volatile double x1 = lhs.x;
5268 0 : volatile double x2 = rhs.x;
5269 0 : volatile double y1 = lhs.y;
5270 0 : volatile double y2 = rhs.y;
5271 0 : return x1==x2 && y1==y2;
5272 : }
5273 :
5274 0 : ae_bool ae_c_neq(ae_complex lhs, ae_complex rhs)
5275 : {
5276 0 : volatile double x1 = lhs.x;
5277 0 : volatile double x2 = rhs.x;
5278 0 : volatile double y1 = lhs.y;
5279 0 : volatile double y2 = rhs.y;
5280 0 : return x1!=x2 || y1!=y2;
5281 : }
5282 :
5283 0 : ae_complex ae_c_add(ae_complex lhs, ae_complex rhs)
5284 : {
5285 : ae_complex result;
5286 0 : result.x = lhs.x+rhs.x;
5287 0 : result.y = lhs.y+rhs.y;
5288 0 : return result;
5289 : }
5290 :
5291 0 : ae_complex ae_c_mul(ae_complex lhs, ae_complex rhs)
5292 : {
5293 : ae_complex result;
5294 0 : result.x = lhs.x*rhs.x-lhs.y*rhs.y;
5295 0 : result.y = lhs.x*rhs.y+lhs.y*rhs.x;
5296 0 : return result;
5297 : }
5298 :
5299 0 : ae_complex ae_c_sub(ae_complex lhs, ae_complex rhs)
5300 : {
5301 : ae_complex result;
5302 0 : result.x = lhs.x-rhs.x;
5303 0 : result.y = lhs.y-rhs.y;
5304 0 : return result;
5305 : }
5306 :
5307 0 : ae_complex ae_c_div(ae_complex lhs, ae_complex rhs)
5308 : {
5309 : ae_complex result;
5310 : double e;
5311 : double f;
5312 0 : if( fabs(rhs.y)<fabs(rhs.x) )
5313 : {
5314 0 : e = rhs.y/rhs.x;
5315 0 : f = rhs.x+rhs.y*e;
5316 0 : result.x = (lhs.x+lhs.y*e)/f;
5317 0 : result.y = (lhs.y-lhs.x*e)/f;
5318 : }
5319 : else
5320 : {
5321 0 : e = rhs.x/rhs.y;
5322 0 : f = rhs.y+rhs.x*e;
5323 0 : result.x = (lhs.y+lhs.x*e)/f;
5324 0 : result.y = (-lhs.x+lhs.y*e)/f;
5325 : }
5326 0 : return result;
5327 : }
5328 :
5329 0 : ae_bool ae_c_eq_d(ae_complex lhs, double rhs)
5330 : {
5331 0 : volatile double x1 = lhs.x;
5332 0 : volatile double x2 = rhs;
5333 0 : volatile double y1 = lhs.y;
5334 0 : volatile double y2 = 0;
5335 0 : return x1==x2 && y1==y2;
5336 : }
5337 :
5338 0 : ae_bool ae_c_neq_d(ae_complex lhs, double rhs)
5339 : {
5340 0 : volatile double x1 = lhs.x;
5341 0 : volatile double x2 = rhs;
5342 0 : volatile double y1 = lhs.y;
5343 0 : volatile double y2 = 0;
5344 0 : return x1!=x2 || y1!=y2;
5345 : }
5346 :
5347 0 : ae_complex ae_c_add_d(ae_complex lhs, double rhs)
5348 : {
5349 : ae_complex result;
5350 0 : result.x = lhs.x+rhs;
5351 0 : result.y = lhs.y;
5352 0 : return result;
5353 : }
5354 :
5355 0 : ae_complex ae_c_mul_d(ae_complex lhs, double rhs)
5356 : {
5357 : ae_complex result;
5358 0 : result.x = lhs.x*rhs;
5359 0 : result.y = lhs.y*rhs;
5360 0 : return result;
5361 : }
5362 :
5363 0 : ae_complex ae_c_sub_d(ae_complex lhs, double rhs)
5364 : {
5365 : ae_complex result;
5366 0 : result.x = lhs.x-rhs;
5367 0 : result.y = lhs.y;
5368 0 : return result;
5369 : }
5370 :
5371 0 : ae_complex ae_c_d_sub(double lhs, ae_complex rhs)
5372 : {
5373 : ae_complex result;
5374 0 : result.x = lhs-rhs.x;
5375 0 : result.y = -rhs.y;
5376 0 : return result;
5377 : }
5378 :
5379 0 : ae_complex ae_c_div_d(ae_complex lhs, double rhs)
5380 : {
5381 : ae_complex result;
5382 0 : result.x = lhs.x/rhs;
5383 0 : result.y = lhs.y/rhs;
5384 0 : return result;
5385 : }
5386 :
5387 0 : ae_complex ae_c_d_div(double lhs, ae_complex rhs)
5388 : {
5389 : ae_complex result;
5390 : double e;
5391 : double f;
5392 0 : if( fabs(rhs.y)<fabs(rhs.x) )
5393 : {
5394 0 : e = rhs.y/rhs.x;
5395 0 : f = rhs.x+rhs.y*e;
5396 0 : result.x = lhs/f;
5397 0 : result.y = -lhs*e/f;
5398 : }
5399 : else
5400 : {
5401 0 : e = rhs.x/rhs.y;
5402 0 : f = rhs.y+rhs.x*e;
5403 0 : result.x = lhs*e/f;
5404 0 : result.y = -lhs/f;
5405 : }
5406 0 : return result;
5407 : }
5408 :
5409 :
5410 : /************************************************************************
5411 : Complex BLAS operations
5412 : ************************************************************************/
5413 0 : ae_complex ae_v_cdotproduct(const ae_complex *v0, ae_int_t stride0, const char *conj0, const ae_complex *v1, ae_int_t stride1, const char *conj1, ae_int_t n)
5414 : {
5415 0 : double rx = 0, ry = 0;
5416 : ae_int_t i;
5417 0 : ae_bool bconj0 = !((conj0[0]=='N') || (conj0[0]=='n'));
5418 0 : ae_bool bconj1 = !((conj1[0]=='N') || (conj1[0]=='n'));
5419 : ae_complex result;
5420 0 : if( bconj0 && bconj1 )
5421 : {
5422 : double v0x, v0y, v1x, v1y;
5423 0 : for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
5424 : {
5425 0 : v0x = v0->x;
5426 0 : v0y = -v0->y;
5427 0 : v1x = v1->x;
5428 0 : v1y = -v1->y;
5429 0 : rx += v0x*v1x-v0y*v1y;
5430 0 : ry += v0x*v1y+v0y*v1x;
5431 : }
5432 : }
5433 0 : if( !bconj0 && bconj1 )
5434 : {
5435 : double v0x, v0y, v1x, v1y;
5436 0 : for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
5437 : {
5438 0 : v0x = v0->x;
5439 0 : v0y = v0->y;
5440 0 : v1x = v1->x;
5441 0 : v1y = -v1->y;
5442 0 : rx += v0x*v1x-v0y*v1y;
5443 0 : ry += v0x*v1y+v0y*v1x;
5444 : }
5445 : }
5446 0 : if( bconj0 && !bconj1 )
5447 : {
5448 : double v0x, v0y, v1x, v1y;
5449 0 : for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
5450 : {
5451 0 : v0x = v0->x;
5452 0 : v0y = -v0->y;
5453 0 : v1x = v1->x;
5454 0 : v1y = v1->y;
5455 0 : rx += v0x*v1x-v0y*v1y;
5456 0 : ry += v0x*v1y+v0y*v1x;
5457 : }
5458 : }
5459 0 : if( !bconj0 && !bconj1 )
5460 : {
5461 : double v0x, v0y, v1x, v1y;
5462 0 : for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
5463 : {
5464 0 : v0x = v0->x;
5465 0 : v0y = v0->y;
5466 0 : v1x = v1->x;
5467 0 : v1y = v1->y;
5468 0 : rx += v0x*v1x-v0y*v1y;
5469 0 : ry += v0x*v1y+v0y*v1x;
5470 : }
5471 : }
5472 0 : result.x = rx;
5473 0 : result.y = ry;
5474 0 : return result;
5475 : }
5476 :
5477 0 : void ae_v_cmove(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
5478 : {
5479 0 : ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5480 : ae_int_t i;
5481 0 : if( stride_dst!=1 || stride_src!=1 )
5482 : {
5483 : /*
5484 : * general unoptimized case
5485 : */
5486 0 : if( bconj )
5487 : {
5488 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5489 : {
5490 0 : vdst->x = vsrc->x;
5491 0 : vdst->y = -vsrc->y;
5492 : }
5493 : }
5494 : else
5495 : {
5496 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5497 0 : *vdst = *vsrc;
5498 : }
5499 : }
5500 : else
5501 : {
5502 : /*
5503 : * optimized case
5504 : */
5505 0 : if( bconj )
5506 : {
5507 0 : for(i=0; i<n; i++, vdst++, vsrc++)
5508 : {
5509 0 : vdst->x = vsrc->x;
5510 0 : vdst->y = -vsrc->y;
5511 : }
5512 : }
5513 : else
5514 : {
5515 0 : for(i=0; i<n; i++, vdst++, vsrc++)
5516 0 : *vdst = *vsrc;
5517 : }
5518 : }
5519 0 : }
5520 :
5521 0 : void ae_v_cmoveneg(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
5522 : {
5523 0 : ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5524 : ae_int_t i;
5525 0 : if( stride_dst!=1 || stride_src!=1 )
5526 : {
5527 : /*
5528 : * general unoptimized case
5529 : */
5530 0 : if( bconj )
5531 : {
5532 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5533 : {
5534 0 : vdst->x = -vsrc->x;
5535 0 : vdst->y = vsrc->y;
5536 : }
5537 : }
5538 : else
5539 : {
5540 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5541 : {
5542 0 : vdst->x = -vsrc->x;
5543 0 : vdst->y = -vsrc->y;
5544 : }
5545 : }
5546 : }
5547 : else
5548 : {
5549 : /*
5550 : * optimized case
5551 : */
5552 0 : if( bconj )
5553 : {
5554 0 : for(i=0; i<n; i++, vdst++, vsrc++)
5555 : {
5556 0 : vdst->x = -vsrc->x;
5557 0 : vdst->y = vsrc->y;
5558 : }
5559 : }
5560 : else
5561 : {
5562 0 : for(i=0; i<n; i++, vdst++, vsrc++)
5563 : {
5564 0 : vdst->x = -vsrc->x;
5565 0 : vdst->y = -vsrc->y;
5566 : }
5567 : }
5568 : }
5569 0 : }
5570 :
5571 0 : void ae_v_cmoved(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
5572 : {
5573 0 : ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5574 : ae_int_t i;
5575 0 : if( stride_dst!=1 || stride_src!=1 )
5576 : {
5577 : /*
5578 : * general unoptimized case
5579 : */
5580 0 : if( bconj )
5581 : {
5582 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5583 : {
5584 0 : vdst->x = alpha*vsrc->x;
5585 0 : vdst->y = -alpha*vsrc->y;
5586 : }
5587 : }
5588 : else
5589 : {
5590 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5591 : {
5592 0 : vdst->x = alpha*vsrc->x;
5593 0 : vdst->y = alpha*vsrc->y;
5594 : }
5595 : }
5596 : }
5597 : else
5598 : {
5599 : /*
5600 : * optimized case
5601 : */
5602 0 : if( bconj )
5603 : {
5604 0 : for(i=0; i<n; i++, vdst++, vsrc++)
5605 : {
5606 0 : vdst->x = alpha*vsrc->x;
5607 0 : vdst->y = -alpha*vsrc->y;
5608 : }
5609 : }
5610 : else
5611 : {
5612 0 : for(i=0; i<n; i++, vdst++, vsrc++)
5613 : {
5614 0 : vdst->x = alpha*vsrc->x;
5615 0 : vdst->y = alpha*vsrc->y;
5616 : }
5617 : }
5618 : }
5619 0 : }
5620 :
5621 0 : void ae_v_cmovec(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha)
5622 : {
5623 0 : ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5624 : ae_int_t i;
5625 0 : if( stride_dst!=1 || stride_src!=1 )
5626 : {
5627 : /*
5628 : * general unoptimized case
5629 : */
5630 0 : if( bconj )
5631 : {
5632 0 : double ax = alpha.x, ay = alpha.y;
5633 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5634 : {
5635 0 : vdst->x = ax*vsrc->x+ay*vsrc->y;
5636 0 : vdst->y = -ax*vsrc->y+ay*vsrc->x;
5637 : }
5638 : }
5639 : else
5640 : {
5641 0 : double ax = alpha.x, ay = alpha.y;
5642 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5643 : {
5644 0 : vdst->x = ax*vsrc->x-ay*vsrc->y;
5645 0 : vdst->y = ax*vsrc->y+ay*vsrc->x;
5646 : }
5647 : }
5648 0 : }
5649 : else
5650 : {
5651 : /*
5652 : * highly optimized case
5653 : */
5654 0 : if( bconj )
5655 : {
5656 0 : double ax = alpha.x, ay = alpha.y;
5657 0 : for(i=0; i<n; i++, vdst++, vsrc++)
5658 : {
5659 0 : vdst->x = ax*vsrc->x+ay*vsrc->y;
5660 0 : vdst->y = -ax*vsrc->y+ay*vsrc->x;
5661 : }
5662 : }
5663 : else
5664 : {
5665 0 : double ax = alpha.x, ay = alpha.y;
5666 0 : for(i=0; i<n; i++, vdst++, vsrc++)
5667 : {
5668 0 : vdst->x = ax*vsrc->x-ay*vsrc->y;
5669 0 : vdst->y = ax*vsrc->y+ay*vsrc->x;
5670 : }
5671 : }
5672 : }
5673 0 : }
5674 :
5675 0 : void ae_v_cadd(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
5676 : {
5677 0 : ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5678 : ae_int_t i;
5679 0 : if( stride_dst!=1 || stride_src!=1 )
5680 : {
5681 : /*
5682 : * general unoptimized case
5683 : */
5684 0 : if( bconj )
5685 : {
5686 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5687 : {
5688 0 : vdst->x += vsrc->x;
5689 0 : vdst->y -= vsrc->y;
5690 : }
5691 : }
5692 : else
5693 : {
5694 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5695 : {
5696 0 : vdst->x += vsrc->x;
5697 0 : vdst->y += vsrc->y;
5698 : }
5699 : }
5700 : }
5701 : else
5702 : {
5703 : /*
5704 : * optimized case
5705 : */
5706 0 : if( bconj )
5707 : {
5708 0 : for(i=0; i<n; i++, vdst++, vsrc++)
5709 : {
5710 0 : vdst->x += vsrc->x;
5711 0 : vdst->y -= vsrc->y;
5712 : }
5713 : }
5714 : else
5715 : {
5716 0 : for(i=0; i<n; i++, vdst++, vsrc++)
5717 : {
5718 0 : vdst->x += vsrc->x;
5719 0 : vdst->y += vsrc->y;
5720 : }
5721 : }
5722 : }
5723 0 : }
5724 :
5725 0 : void ae_v_caddd(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
5726 : {
5727 0 : ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5728 : ae_int_t i;
5729 0 : if( stride_dst!=1 || stride_src!=1 )
5730 : {
5731 : /*
5732 : * general unoptimized case
5733 : */
5734 0 : if( bconj )
5735 : {
5736 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5737 : {
5738 0 : vdst->x += alpha*vsrc->x;
5739 0 : vdst->y -= alpha*vsrc->y;
5740 : }
5741 : }
5742 : else
5743 : {
5744 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5745 : {
5746 0 : vdst->x += alpha*vsrc->x;
5747 0 : vdst->y += alpha*vsrc->y;
5748 : }
5749 : }
5750 : }
5751 : else
5752 : {
5753 : /*
5754 : * optimized case
5755 : */
5756 0 : if( bconj )
5757 : {
5758 0 : for(i=0; i<n; i++, vdst++, vsrc++)
5759 : {
5760 0 : vdst->x += alpha*vsrc->x;
5761 0 : vdst->y -= alpha*vsrc->y;
5762 : }
5763 : }
5764 : else
5765 : {
5766 0 : for(i=0; i<n; i++, vdst++, vsrc++)
5767 : {
5768 0 : vdst->x += alpha*vsrc->x;
5769 0 : vdst->y += alpha*vsrc->y;
5770 : }
5771 : }
5772 : }
5773 0 : }
5774 :
5775 0 : void ae_v_caddc(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha)
5776 : {
5777 0 : ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5778 : ae_int_t i;
5779 0 : if( stride_dst!=1 || stride_src!=1 )
5780 : {
5781 : /*
5782 : * general unoptimized case
5783 : */
5784 0 : double ax = alpha.x, ay = alpha.y;
5785 0 : if( bconj )
5786 : {
5787 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5788 : {
5789 0 : vdst->x += ax*vsrc->x+ay*vsrc->y;
5790 0 : vdst->y -= ax*vsrc->y-ay*vsrc->x;
5791 : }
5792 : }
5793 : else
5794 : {
5795 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5796 : {
5797 0 : vdst->x += ax*vsrc->x-ay*vsrc->y;
5798 0 : vdst->y += ax*vsrc->y+ay*vsrc->x;
5799 : }
5800 : }
5801 0 : }
5802 : else
5803 : {
5804 : /*
5805 : * highly optimized case
5806 : */
5807 0 : double ax = alpha.x, ay = alpha.y;
5808 0 : if( bconj )
5809 : {
5810 0 : for(i=0; i<n; i++, vdst++, vsrc++)
5811 : {
5812 0 : vdst->x += ax*vsrc->x+ay*vsrc->y;
5813 0 : vdst->y -= ax*vsrc->y-ay*vsrc->x;
5814 : }
5815 : }
5816 : else
5817 : {
5818 0 : for(i=0; i<n; i++, vdst++, vsrc++)
5819 : {
5820 0 : vdst->x += ax*vsrc->x-ay*vsrc->y;
5821 0 : vdst->y += ax*vsrc->y+ay*vsrc->x;
5822 : }
5823 : }
5824 : }
5825 0 : }
5826 :
5827 0 : void ae_v_csub(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
5828 : {
5829 0 : ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5830 : ae_int_t i;
5831 0 : if( stride_dst!=1 || stride_src!=1 )
5832 : {
5833 : /*
5834 : * general unoptimized case
5835 : */
5836 0 : if( bconj )
5837 : {
5838 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5839 : {
5840 0 : vdst->x -= vsrc->x;
5841 0 : vdst->y += vsrc->y;
5842 : }
5843 : }
5844 : else
5845 : {
5846 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5847 : {
5848 0 : vdst->x -= vsrc->x;
5849 0 : vdst->y -= vsrc->y;
5850 : }
5851 : }
5852 : }
5853 : else
5854 : {
5855 : /*
5856 : * highly optimized case
5857 : */
5858 0 : if( bconj )
5859 : {
5860 0 : for(i=0; i<n; i++, vdst++, vsrc++)
5861 : {
5862 0 : vdst->x -= vsrc->x;
5863 0 : vdst->y += vsrc->y;
5864 : }
5865 : }
5866 : else
5867 : {
5868 0 : for(i=0; i<n; i++, vdst++, vsrc++)
5869 : {
5870 0 : vdst->x -= vsrc->x;
5871 0 : vdst->y -= vsrc->y;
5872 : }
5873 : }
5874 : }
5875 0 : }
5876 :
5877 0 : void ae_v_csubd(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
5878 : {
5879 0 : ae_v_caddd(vdst, stride_dst, vsrc, stride_src, conj_src, n, -alpha);
5880 0 : }
5881 :
5882 0 : void ae_v_csubc(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha)
5883 : {
5884 0 : alpha.x = -alpha.x;
5885 0 : alpha.y = -alpha.y;
5886 0 : ae_v_caddc(vdst, stride_dst, vsrc, stride_src, conj_src, n, alpha);
5887 0 : }
5888 :
5889 0 : void ae_v_cmuld(ae_complex *vdst, ae_int_t stride_dst, ae_int_t n, double alpha)
5890 : {
5891 : ae_int_t i;
5892 0 : if( stride_dst!=1 )
5893 : {
5894 : /*
5895 : * general unoptimized case
5896 : */
5897 0 : for(i=0; i<n; i++, vdst+=stride_dst)
5898 : {
5899 0 : vdst->x *= alpha;
5900 0 : vdst->y *= alpha;
5901 : }
5902 : }
5903 : else
5904 : {
5905 : /*
5906 : * optimized case
5907 : */
5908 0 : for(i=0; i<n; i++, vdst++)
5909 : {
5910 0 : vdst->x *= alpha;
5911 0 : vdst->y *= alpha;
5912 : }
5913 : }
5914 0 : }
5915 :
5916 0 : void ae_v_cmulc(ae_complex *vdst, ae_int_t stride_dst, ae_int_t n, ae_complex alpha)
5917 : {
5918 : ae_int_t i;
5919 0 : if( stride_dst!=1 )
5920 : {
5921 : /*
5922 : * general unoptimized case
5923 : */
5924 0 : double ax = alpha.x, ay = alpha.y;
5925 0 : for(i=0; i<n; i++, vdst+=stride_dst)
5926 : {
5927 0 : double dstx = vdst->x, dsty = vdst->y;
5928 0 : vdst->x = ax*dstx-ay*dsty;
5929 0 : vdst->y = ax*dsty+ay*dstx;
5930 : }
5931 : }
5932 : else
5933 : {
5934 : /*
5935 : * highly optimized case
5936 : */
5937 0 : double ax = alpha.x, ay = alpha.y;
5938 0 : for(i=0; i<n; i++, vdst++)
5939 : {
5940 0 : double dstx = vdst->x, dsty = vdst->y;
5941 0 : vdst->x = ax*dstx-ay*dsty;
5942 0 : vdst->y = ax*dsty+ay*dstx;
5943 : }
5944 : }
5945 0 : }
5946 :
5947 : /************************************************************************
5948 : Real BLAS operations
5949 : ************************************************************************/
5950 0 : double ae_v_dotproduct(const double *v0, ae_int_t stride0, const double *v1, ae_int_t stride1, ae_int_t n)
5951 : {
5952 0 : double result = 0;
5953 : ae_int_t i;
5954 0 : if( stride0!=1 || stride1!=1 )
5955 : {
5956 : /*
5957 : * slow general code
5958 : */
5959 0 : for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
5960 0 : result += (*v0)*(*v1);
5961 : }
5962 : else
5963 : {
5964 : /*
5965 : * optimized code for stride=1
5966 : */
5967 0 : ae_int_t n4 = n/4;
5968 0 : ae_int_t nleft = n%4;
5969 0 : for(i=0; i<n4; i++, v0+=4, v1+=4)
5970 0 : result += v0[0]*v1[0]+v0[1]*v1[1]+v0[2]*v1[2]+v0[3]*v1[3];
5971 0 : for(i=0; i<nleft; i++, v0++, v1++)
5972 0 : result += v0[0]*v1[0];
5973 : }
5974 0 : return result;
5975 : }
5976 :
5977 0 : void ae_v_move(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n)
5978 : {
5979 : ae_int_t i;
5980 0 : if( stride_dst!=1 || stride_src!=1 )
5981 : {
5982 : /*
5983 : * general unoptimized case
5984 : */
5985 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5986 0 : *vdst = *vsrc;
5987 : }
5988 : else
5989 : {
5990 : /*
5991 : * optimized case
5992 : */
5993 0 : ae_int_t n2 = n/2;
5994 0 : for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
5995 : {
5996 0 : vdst[0] = vsrc[0];
5997 0 : vdst[1] = vsrc[1];
5998 : }
5999 0 : if( n%2!=0 )
6000 0 : vdst[0] = vsrc[0];
6001 : }
6002 0 : }
6003 :
6004 0 : void ae_v_moveneg(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n)
6005 : {
6006 : ae_int_t i;
6007 0 : if( stride_dst!=1 || stride_src!=1 )
6008 : {
6009 : /*
6010 : * general unoptimized case
6011 : */
6012 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6013 0 : *vdst = -*vsrc;
6014 : }
6015 : else
6016 : {
6017 : /*
6018 : * optimized case
6019 : */
6020 0 : ae_int_t n2 = n/2;
6021 0 : for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6022 : {
6023 0 : vdst[0] = -vsrc[0];
6024 0 : vdst[1] = -vsrc[1];
6025 : }
6026 0 : if( n%2!=0 )
6027 0 : vdst[0] = -vsrc[0];
6028 : }
6029 0 : }
6030 :
6031 0 : void ae_v_moved(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
6032 : {
6033 : ae_int_t i;
6034 0 : if( stride_dst!=1 || stride_src!=1 )
6035 : {
6036 : /*
6037 : * general unoptimized case
6038 : */
6039 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6040 0 : *vdst = alpha*(*vsrc);
6041 : }
6042 : else
6043 : {
6044 : /*
6045 : * optimized case
6046 : */
6047 0 : ae_int_t n2 = n/2;
6048 0 : for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6049 : {
6050 0 : vdst[0] = alpha*vsrc[0];
6051 0 : vdst[1] = alpha*vsrc[1];
6052 : }
6053 0 : if( n%2!=0 )
6054 0 : vdst[0] = alpha*vsrc[0];
6055 : }
6056 0 : }
6057 :
6058 0 : void ae_v_add(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n)
6059 : {
6060 : ae_int_t i;
6061 0 : if( stride_dst!=1 || stride_src!=1 )
6062 : {
6063 : /*
6064 : * general unoptimized case
6065 : */
6066 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6067 0 : *vdst += *vsrc;
6068 : }
6069 : else
6070 : {
6071 : /*
6072 : * optimized case
6073 : */
6074 0 : ae_int_t n2 = n/2;
6075 0 : for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6076 : {
6077 0 : vdst[0] += vsrc[0];
6078 0 : vdst[1] += vsrc[1];
6079 : }
6080 0 : if( n%2!=0 )
6081 0 : vdst[0] += vsrc[0];
6082 : }
6083 0 : }
6084 :
6085 0 : void ae_v_addd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
6086 : {
6087 : ae_int_t i;
6088 0 : if( stride_dst!=1 || stride_src!=1 )
6089 : {
6090 : /*
6091 : * general unoptimized case
6092 : */
6093 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6094 0 : *vdst += alpha*(*vsrc);
6095 : }
6096 : else
6097 : {
6098 : /*
6099 : * optimized case
6100 : */
6101 0 : ae_int_t n2 = n/2;
6102 0 : for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6103 : {
6104 0 : vdst[0] += alpha*vsrc[0];
6105 0 : vdst[1] += alpha*vsrc[1];
6106 : }
6107 0 : if( n%2!=0 )
6108 0 : vdst[0] += alpha*vsrc[0];
6109 : }
6110 0 : }
6111 :
6112 0 : void ae_v_sub(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n)
6113 : {
6114 : ae_int_t i;
6115 0 : if( stride_dst!=1 || stride_src!=1 )
6116 : {
6117 : /*
6118 : * general unoptimized case
6119 : */
6120 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6121 0 : *vdst -= *vsrc;
6122 : }
6123 : else
6124 : {
6125 : /*
6126 : * highly optimized case
6127 : */
6128 0 : ae_int_t n2 = n/2;
6129 0 : for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6130 : {
6131 0 : vdst[0] -= vsrc[0];
6132 0 : vdst[1] -= vsrc[1];
6133 : }
6134 0 : if( n%2!=0 )
6135 0 : vdst[0] -= vsrc[0];
6136 : }
6137 0 : }
6138 :
6139 0 : void ae_v_subd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
6140 : {
6141 0 : ae_v_addd(vdst, stride_dst, vsrc, stride_src, n, -alpha);
6142 0 : }
6143 :
6144 0 : void ae_v_muld(double *vdst, ae_int_t stride_dst, ae_int_t n, double alpha)
6145 : {
6146 : ae_int_t i;
6147 0 : if( stride_dst!=1 )
6148 : {
6149 : /*
6150 : * general unoptimized case
6151 : */
6152 0 : for(i=0; i<n; i++, vdst+=stride_dst)
6153 0 : *vdst *= alpha;
6154 : }
6155 : else
6156 : {
6157 : /*
6158 : * highly optimized case
6159 : */
6160 0 : for(i=0; i<n; i++, vdst++)
6161 0 : *vdst *= alpha;
6162 : }
6163 0 : }
6164 :
6165 : /************************************************************************
6166 : Other functions
6167 : ************************************************************************/
6168 0 : ae_int_t ae_v_len(ae_int_t a, ae_int_t b)
6169 : {
6170 0 : return b-a+1;
6171 : }
6172 :
6173 : /************************************************************************
6174 : RComm functions
6175 : ************************************************************************/
6176 0 : void _rcommstate_init(rcommstate* p, ae_state *_state, ae_bool make_automatic)
6177 : {
6178 : /* initial zero-filling */
6179 0 : memset(&p->ba, 0, sizeof(p->ba));
6180 0 : memset(&p->ia, 0, sizeof(p->ia));
6181 0 : memset(&p->ra, 0, sizeof(p->ra));
6182 0 : memset(&p->ca, 0, sizeof(p->ca));
6183 :
6184 : /* initialization */
6185 0 : ae_vector_init(&p->ba, 0, DT_BOOL, _state, make_automatic);
6186 0 : ae_vector_init(&p->ia, 0, DT_INT, _state, make_automatic);
6187 0 : ae_vector_init(&p->ra, 0, DT_REAL, _state, make_automatic);
6188 0 : ae_vector_init(&p->ca, 0, DT_COMPLEX, _state, make_automatic);
6189 0 : }
6190 :
6191 0 : void _rcommstate_init_copy(rcommstate* dst, rcommstate* src, ae_state *_state, ae_bool make_automatic)
6192 : {
6193 : /* initial zero-filling */
6194 0 : memset(&dst->ba, 0, sizeof(dst->ba));
6195 0 : memset(&dst->ia, 0, sizeof(dst->ia));
6196 0 : memset(&dst->ra, 0, sizeof(dst->ra));
6197 0 : memset(&dst->ca, 0, sizeof(dst->ca));
6198 :
6199 : /* initialization */
6200 0 : ae_vector_init_copy(&dst->ba, &src->ba, _state, make_automatic);
6201 0 : ae_vector_init_copy(&dst->ia, &src->ia, _state, make_automatic);
6202 0 : ae_vector_init_copy(&dst->ra, &src->ra, _state, make_automatic);
6203 0 : ae_vector_init_copy(&dst->ca, &src->ca, _state, make_automatic);
6204 0 : dst->stage = src->stage;
6205 0 : }
6206 :
6207 0 : void _rcommstate_clear(rcommstate* p)
6208 : {
6209 0 : ae_vector_clear(&p->ba);
6210 0 : ae_vector_clear(&p->ia);
6211 0 : ae_vector_clear(&p->ra);
6212 0 : ae_vector_clear(&p->ca);
6213 0 : }
6214 :
6215 0 : void _rcommstate_destroy(rcommstate* p)
6216 : {
6217 0 : _rcommstate_clear(p);
6218 0 : }
6219 :
6220 :
6221 : }
6222 :
6223 : /////////////////////////////////////////////////////////////////////////
6224 : //
6225 : // THIS SECTION CONTAINS C++ RELATED FUNCTIONALITY
6226 : //
6227 : /////////////////////////////////////////////////////////////////////////
6228 : /********************************************************************
6229 : Internal forwards
6230 : ********************************************************************/
6231 : namespace alglib
6232 : {
6233 : double get_aenv_nan();
6234 : double get_aenv_posinf();
6235 : double get_aenv_neginf();
6236 : ae_int_t my_stricmp(const char *s1, const char *s2);
6237 : char* filter_spaces(const char *s);
6238 : void str_vector_create(const char *src, bool match_head_only, std::vector<const char*> *p_vec);
6239 : void str_matrix_create(const char *src, std::vector< std::vector<const char*> > *p_mat);
6240 :
6241 : ae_bool parse_bool_delim(const char *s, const char *delim);
6242 : ae_int_t parse_int_delim(const char *s, const char *delim);
6243 : bool _parse_real_delim(const char *s, const char *delim, double *result, const char **new_s);
6244 : double parse_real_delim(const char *s, const char *delim);
6245 : alglib::complex parse_complex_delim(const char *s, const char *delim);
6246 :
6247 : std::string arraytostring(const bool *ptr, ae_int_t n);
6248 : std::string arraytostring(const ae_int_t *ptr, ae_int_t n);
6249 : std::string arraytostring(const double *ptr, ae_int_t n, int dps);
6250 : std::string arraytostring(const alglib::complex *ptr, ae_int_t n, int dps);
6251 : }
6252 :
6253 : /********************************************************************
6254 : Global and local constants/variables
6255 : ********************************************************************/
6256 : const double alglib::machineepsilon = 5E-16;
6257 : const double alglib::maxrealnumber = 1E300;
6258 : const double alglib::minrealnumber = 1E-300;
6259 : const alglib::ae_int_t alglib::endianness = alglib_impl::ae_get_endianness();
6260 : const double alglib::fp_nan = alglib::get_aenv_nan();
6261 : const double alglib::fp_posinf = alglib::get_aenv_posinf();
6262 : const double alglib::fp_neginf = alglib::get_aenv_neginf();
6263 : #if defined(AE_NO_EXCEPTIONS)
6264 : static const char *_alglib_last_error = NULL;
6265 : #endif
6266 : static const alglib_impl::ae_uint64_t _i64_xdefault = 0x0;
6267 : static const alglib_impl::ae_uint64_t _i64_xserial = _ALGLIB_FLG_THREADING_SERIAL;
6268 : static const alglib_impl::ae_uint64_t _i64_xparallel = _ALGLIB_FLG_THREADING_PARALLEL;
6269 : const alglib::xparams &alglib::xdefault = *((const alglib::xparams *)(&_i64_xdefault));
6270 : const alglib::xparams &alglib::serial = *((const alglib::xparams *)(&_i64_xserial));
6271 : const alglib::xparams &alglib::parallel = *((const alglib::xparams *)(&_i64_xparallel));
6272 :
6273 :
6274 :
6275 : /********************************************************************
6276 : Exception handling
6277 : ********************************************************************/
6278 : #if !defined(AE_NO_EXCEPTIONS)
6279 0 : alglib::ap_error::ap_error()
6280 : {
6281 0 : }
6282 :
6283 0 : alglib::ap_error::ap_error(const char *s)
6284 : {
6285 0 : msg = s;
6286 0 : }
6287 :
6288 0 : void alglib::ap_error::make_assertion(bool bClause)
6289 : {
6290 0 : if(!bClause)
6291 0 : _ALGLIB_CPP_EXCEPTION("");
6292 0 : }
6293 :
6294 0 : void alglib::ap_error::make_assertion(bool bClause, const char *p_msg)
6295 : {
6296 0 : if(!bClause)
6297 0 : _ALGLIB_CPP_EXCEPTION(p_msg);
6298 0 : }
6299 : #else
6300 : void alglib::set_error_flag(const char *s)
6301 : {
6302 : if( s==NULL )
6303 : s = "ALGLIB: unknown error";
6304 : _alglib_last_error = s;
6305 : }
6306 :
6307 : bool alglib::get_error_flag(const char **p_msg)
6308 : {
6309 : if( _alglib_last_error==NULL )
6310 : return false;
6311 : if( p_msg!=NULL )
6312 : *p_msg = _alglib_last_error;
6313 : return true;
6314 : }
6315 :
6316 : void alglib::clear_error_flag()
6317 : {
6318 : _alglib_last_error = NULL;
6319 : }
6320 : #endif
6321 :
6322 : /********************************************************************
6323 : Complex number with double precision.
6324 : ********************************************************************/
6325 0 : alglib::complex::complex():x(0.0),y(0.0)
6326 : {
6327 0 : }
6328 :
6329 0 : alglib::complex::complex(const double &_x):x(_x),y(0.0)
6330 : {
6331 0 : }
6332 :
6333 0 : alglib::complex::complex(const double &_x, const double &_y):x(_x),y(_y)
6334 : {
6335 0 : }
6336 :
6337 0 : alglib::complex::complex(const alglib::complex &z):x(z.x),y(z.y)
6338 : {
6339 0 : }
6340 :
6341 0 : alglib::complex& alglib::complex::operator= (const double& v)
6342 : {
6343 0 : x = v;
6344 0 : y = 0.0;
6345 0 : return *this;
6346 : }
6347 :
6348 0 : alglib::complex& alglib::complex::operator+=(const double& v)
6349 : {
6350 0 : x += v;
6351 0 : return *this;
6352 : }
6353 :
6354 0 : alglib::complex& alglib::complex::operator-=(const double& v)
6355 : {
6356 0 : x -= v;
6357 0 : return *this;
6358 : }
6359 :
6360 0 : alglib::complex& alglib::complex::operator*=(const double& v)
6361 : {
6362 0 : x *= v;
6363 0 : y *= v;
6364 0 : return *this;
6365 : }
6366 :
6367 0 : alglib::complex& alglib::complex::operator/=(const double& v)
6368 : {
6369 0 : x /= v;
6370 0 : y /= v;
6371 0 : return *this;
6372 : }
6373 :
6374 0 : alglib::complex& alglib::complex::operator= (const alglib::complex& z)
6375 : {
6376 0 : x = z.x;
6377 0 : y = z.y;
6378 0 : return *this;
6379 : }
6380 :
6381 0 : alglib::complex& alglib::complex::operator+=(const alglib::complex& z)
6382 : {
6383 0 : x += z.x;
6384 0 : y += z.y;
6385 0 : return *this;
6386 : }
6387 :
6388 0 : alglib::complex& alglib::complex::operator-=(const alglib::complex& z)
6389 : {
6390 0 : x -= z.x;
6391 0 : y -= z.y;
6392 0 : return *this;
6393 : }
6394 :
6395 0 : alglib::complex& alglib::complex::operator*=(const alglib::complex& z)
6396 : {
6397 0 : double t = x*z.x-y*z.y;
6398 0 : y = x*z.y+y*z.x;
6399 0 : x = t;
6400 0 : return *this;
6401 : }
6402 :
6403 0 : alglib::complex& alglib::complex::operator/=(const alglib::complex& z)
6404 : {
6405 0 : alglib::complex result;
6406 : double e;
6407 : double f;
6408 0 : if( fabs(z.y)<fabs(z.x) )
6409 : {
6410 0 : e = z.y/z.x;
6411 0 : f = z.x+z.y*e;
6412 0 : result.x = (x+y*e)/f;
6413 0 : result.y = (y-x*e)/f;
6414 : }
6415 : else
6416 : {
6417 0 : e = z.x/z.y;
6418 0 : f = z.y+z.x*e;
6419 0 : result.x = (y+x*e)/f;
6420 0 : result.y = (-x+y*e)/f;
6421 : }
6422 0 : *this = result;
6423 0 : return *this;
6424 : }
6425 :
6426 0 : alglib_impl::ae_complex* alglib::complex::c_ptr()
6427 : {
6428 0 : return (alglib_impl::ae_complex*)this;
6429 : }
6430 :
6431 0 : const alglib_impl::ae_complex* alglib::complex::c_ptr() const
6432 : {
6433 0 : return (const alglib_impl::ae_complex*)this;
6434 : }
6435 :
6436 : #if !defined(AE_NO_EXCEPTIONS)
6437 0 : std::string alglib::complex::tostring(int _dps) const
6438 : {
6439 : char mask[32];
6440 : char buf_x[32];
6441 : char buf_y[32];
6442 : char buf_zero[32];
6443 0 : int dps = _dps>=0 ? _dps : -_dps;
6444 0 : if( dps<=0 || dps>=20 )
6445 0 : _ALGLIB_CPP_EXCEPTION("complex::tostring(): incorrect dps");
6446 :
6447 : // handle IEEE special quantities
6448 0 : if( fp_isnan(x) || fp_isnan(y) )
6449 0 : return "NAN";
6450 0 : if( fp_isinf(x) || fp_isinf(y) )
6451 0 : return "INF";
6452 :
6453 : // generate mask
6454 0 : if( sprintf(mask, "%%.%d%s", dps, _dps>=0 ? "f" : "e")>=(int)sizeof(mask) )
6455 0 : _ALGLIB_CPP_EXCEPTION("complex::tostring(): buffer overflow");
6456 :
6457 : // print |x|, |y| and zero with same mask and compare
6458 0 : if( sprintf(buf_x, mask, (double)(fabs(x)))>=(int)sizeof(buf_x) )
6459 0 : _ALGLIB_CPP_EXCEPTION("complex::tostring(): buffer overflow");
6460 0 : if( sprintf(buf_y, mask, (double)(fabs(y)))>=(int)sizeof(buf_y) )
6461 0 : _ALGLIB_CPP_EXCEPTION("complex::tostring(): buffer overflow");
6462 0 : if( sprintf(buf_zero, mask, (double)0)>=(int)sizeof(buf_zero) )
6463 0 : _ALGLIB_CPP_EXCEPTION("complex::tostring(): buffer overflow");
6464 :
6465 : // different zero/nonzero patterns
6466 0 : if( strcmp(buf_x,buf_zero)!=0 && strcmp(buf_y,buf_zero)!=0 )
6467 0 : return std::string(x>0 ? "" : "-")+buf_x+(y>0 ? "+" : "-")+buf_y+"i";
6468 0 : if( strcmp(buf_x,buf_zero)!=0 && strcmp(buf_y,buf_zero)==0 )
6469 0 : return std::string(x>0 ? "" : "-")+buf_x;
6470 0 : if( strcmp(buf_x,buf_zero)==0 && strcmp(buf_y,buf_zero)!=0 )
6471 0 : return std::string(y>0 ? "" : "-")+buf_y+"i";
6472 0 : return std::string("0");
6473 : }
6474 : #endif
6475 :
6476 0 : bool alglib::operator==(const alglib::complex& lhs, const alglib::complex& rhs)
6477 : {
6478 0 : volatile double x1 = lhs.x;
6479 0 : volatile double x2 = rhs.x;
6480 0 : volatile double y1 = lhs.y;
6481 0 : volatile double y2 = rhs.y;
6482 0 : return x1==x2 && y1==y2;
6483 : }
6484 :
6485 0 : bool alglib::operator!=(const alglib::complex& lhs, const alglib::complex& rhs)
6486 0 : { return !(lhs==rhs); }
6487 :
6488 0 : const alglib::complex alglib::operator+(const alglib::complex& lhs)
6489 0 : { return lhs; }
6490 :
6491 0 : const alglib::complex alglib::operator-(const alglib::complex& lhs)
6492 0 : { return alglib::complex(-lhs.x, -lhs.y); }
6493 :
6494 0 : const alglib::complex alglib::operator+(const alglib::complex& lhs, const alglib::complex& rhs)
6495 0 : { alglib::complex r = lhs; r += rhs; return r; }
6496 :
6497 0 : const alglib::complex alglib::operator+(const alglib::complex& lhs, const double& rhs)
6498 0 : { alglib::complex r = lhs; r += rhs; return r; }
6499 :
6500 0 : const alglib::complex alglib::operator+(const double& lhs, const alglib::complex& rhs)
6501 0 : { alglib::complex r = rhs; r += lhs; return r; }
6502 :
6503 0 : const alglib::complex alglib::operator-(const alglib::complex& lhs, const alglib::complex& rhs)
6504 0 : { alglib::complex r = lhs; r -= rhs; return r; }
6505 :
6506 0 : const alglib::complex alglib::operator-(const alglib::complex& lhs, const double& rhs)
6507 0 : { alglib::complex r = lhs; r -= rhs; return r; }
6508 :
6509 0 : const alglib::complex alglib::operator-(const double& lhs, const alglib::complex& rhs)
6510 0 : { alglib::complex r = lhs; r -= rhs; return r; }
6511 :
6512 0 : const alglib::complex alglib::operator*(const alglib::complex& lhs, const alglib::complex& rhs)
6513 0 : { return alglib::complex(lhs.x*rhs.x - lhs.y*rhs.y, lhs.x*rhs.y + lhs.y*rhs.x); }
6514 :
6515 0 : const alglib::complex alglib::operator*(const alglib::complex& lhs, const double& rhs)
6516 0 : { return alglib::complex(lhs.x*rhs, lhs.y*rhs); }
6517 :
6518 0 : const alglib::complex alglib::operator*(const double& lhs, const alglib::complex& rhs)
6519 0 : { return alglib::complex(lhs*rhs.x, lhs*rhs.y); }
6520 :
6521 0 : const alglib::complex alglib::operator/(const alglib::complex& lhs, const alglib::complex& rhs)
6522 : {
6523 0 : alglib::complex result;
6524 : double e;
6525 : double f;
6526 0 : if( fabs(rhs.y)<fabs(rhs.x) )
6527 : {
6528 0 : e = rhs.y/rhs.x;
6529 0 : f = rhs.x+rhs.y*e;
6530 0 : result.x = (lhs.x+lhs.y*e)/f;
6531 0 : result.y = (lhs.y-lhs.x*e)/f;
6532 : }
6533 : else
6534 : {
6535 0 : e = rhs.x/rhs.y;
6536 0 : f = rhs.y+rhs.x*e;
6537 0 : result.x = (lhs.y+lhs.x*e)/f;
6538 0 : result.y = (-lhs.x+lhs.y*e)/f;
6539 : }
6540 0 : return result;
6541 : }
6542 :
6543 0 : const alglib::complex alglib::operator/(const double& lhs, const alglib::complex& rhs)
6544 : {
6545 0 : alglib::complex result;
6546 : double e;
6547 : double f;
6548 0 : if( fabs(rhs.y)<fabs(rhs.x) )
6549 : {
6550 0 : e = rhs.y/rhs.x;
6551 0 : f = rhs.x+rhs.y*e;
6552 0 : result.x = lhs/f;
6553 0 : result.y = -lhs*e/f;
6554 : }
6555 : else
6556 : {
6557 0 : e = rhs.x/rhs.y;
6558 0 : f = rhs.y+rhs.x*e;
6559 0 : result.x = lhs*e/f;
6560 0 : result.y = -lhs/f;
6561 : }
6562 0 : return result;
6563 : }
6564 :
6565 0 : const alglib::complex alglib::operator/(const alglib::complex& lhs, const double& rhs)
6566 0 : { return alglib::complex(lhs.x/rhs, lhs.y/rhs); }
6567 :
6568 0 : double alglib::abscomplex(const alglib::complex &z)
6569 : {
6570 : double w;
6571 : double xabs;
6572 : double yabs;
6573 : double v;
6574 :
6575 0 : xabs = fabs(z.x);
6576 0 : yabs = fabs(z.y);
6577 0 : w = xabs>yabs ? xabs : yabs;
6578 0 : v = xabs<yabs ? xabs : yabs;
6579 0 : if( v==0 )
6580 0 : return w;
6581 : else
6582 : {
6583 0 : double t = v/w;
6584 0 : return w*sqrt(1+t*t);
6585 : }
6586 : }
6587 :
6588 0 : alglib::complex alglib::conj(const alglib::complex &z)
6589 0 : { return alglib::complex(z.x, -z.y); }
6590 :
6591 0 : alglib::complex alglib::csqr(const alglib::complex &z)
6592 0 : { return alglib::complex(z.x*z.x-z.y*z.y, 2*z.x*z.y); }
6593 :
6594 0 : void alglib::setnworkers(alglib::ae_int_t nworkers)
6595 : {
6596 : #ifdef AE_HPC
6597 : alglib_impl::ae_set_cores_to_use(nworkers);
6598 : #endif
6599 0 : }
6600 :
6601 0 : void alglib::setglobalthreading(const alglib::xparams settings)
6602 : {
6603 : #ifdef AE_HPC
6604 : alglib_impl::ae_set_global_threading(settings.flags);
6605 : #endif
6606 0 : }
6607 :
6608 0 : alglib::ae_int_t alglib::getnworkers()
6609 : {
6610 : #ifdef AE_HPC
6611 : return alglib_impl::ae_get_cores_to_use();
6612 : #else
6613 0 : return 1;
6614 : #endif
6615 : }
6616 :
6617 0 : alglib::ae_int_t alglib::_ae_cores_count()
6618 : {
6619 : #ifdef AE_HPC
6620 : return alglib_impl::ae_cores_count();
6621 : #else
6622 0 : return 1;
6623 : #endif
6624 : }
6625 :
6626 0 : void alglib::_ae_set_global_threading(alglib_impl::ae_uint64_t flg_value)
6627 : {
6628 : #ifdef AE_HPC
6629 : alglib_impl::ae_set_global_threading(flg_value);
6630 : #endif
6631 0 : }
6632 :
6633 0 : alglib_impl::ae_uint64_t alglib::_ae_get_global_threading()
6634 : {
6635 : #ifdef AE_HPC
6636 : return alglib_impl::ae_get_global_threading();
6637 : #else
6638 0 : return _ALGLIB_FLG_THREADING_SERIAL;
6639 : #endif
6640 : }
6641 :
6642 :
6643 : /********************************************************************
6644 : Level 1 BLAS functions
6645 : ********************************************************************/
6646 0 : double alglib::vdotproduct(const double *v0, ae_int_t stride0, const double *v1, ae_int_t stride1, ae_int_t n)
6647 : {
6648 0 : double result = 0;
6649 : ae_int_t i;
6650 0 : if( stride0!=1 || stride1!=1 )
6651 : {
6652 : //
6653 : // slow general code
6654 : //
6655 0 : for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
6656 0 : result += (*v0)*(*v1);
6657 : }
6658 : else
6659 : {
6660 : //
6661 : // optimized code for stride=1
6662 : //
6663 0 : ae_int_t n4 = n/4;
6664 0 : ae_int_t nleft = n%4;
6665 0 : for(i=0; i<n4; i++, v0+=4, v1+=4)
6666 0 : result += v0[0]*v1[0]+v0[1]*v1[1]+v0[2]*v1[2]+v0[3]*v1[3];
6667 0 : for(i=0; i<nleft; i++, v0++, v1++)
6668 0 : result += v0[0]*v1[0];
6669 : }
6670 0 : return result;
6671 : }
6672 :
6673 0 : double alglib::vdotproduct(const double *v1, const double *v2, ae_int_t N)
6674 : {
6675 0 : return vdotproduct(v1, 1, v2, 1, N);
6676 : }
6677 :
6678 0 : alglib::complex alglib::vdotproduct(const alglib::complex *v0, ae_int_t stride0, const char *conj0, const alglib::complex *v1, ae_int_t stride1, const char *conj1, ae_int_t n)
6679 : {
6680 0 : double rx = 0, ry = 0;
6681 : ae_int_t i;
6682 0 : bool bconj0 = !((conj0[0]=='N') || (conj0[0]=='n'));
6683 0 : bool bconj1 = !((conj1[0]=='N') || (conj1[0]=='n'));
6684 0 : if( bconj0 && bconj1 )
6685 : {
6686 : double v0x, v0y, v1x, v1y;
6687 0 : for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
6688 : {
6689 0 : v0x = v0->x;
6690 0 : v0y = -v0->y;
6691 0 : v1x = v1->x;
6692 0 : v1y = -v1->y;
6693 0 : rx += v0x*v1x-v0y*v1y;
6694 0 : ry += v0x*v1y+v0y*v1x;
6695 : }
6696 : }
6697 0 : if( !bconj0 && bconj1 )
6698 : {
6699 : double v0x, v0y, v1x, v1y;
6700 0 : for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
6701 : {
6702 0 : v0x = v0->x;
6703 0 : v0y = v0->y;
6704 0 : v1x = v1->x;
6705 0 : v1y = -v1->y;
6706 0 : rx += v0x*v1x-v0y*v1y;
6707 0 : ry += v0x*v1y+v0y*v1x;
6708 : }
6709 : }
6710 0 : if( bconj0 && !bconj1 )
6711 : {
6712 : double v0x, v0y, v1x, v1y;
6713 0 : for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
6714 : {
6715 0 : v0x = v0->x;
6716 0 : v0y = -v0->y;
6717 0 : v1x = v1->x;
6718 0 : v1y = v1->y;
6719 0 : rx += v0x*v1x-v0y*v1y;
6720 0 : ry += v0x*v1y+v0y*v1x;
6721 : }
6722 : }
6723 0 : if( !bconj0 && !bconj1 )
6724 : {
6725 : double v0x, v0y, v1x, v1y;
6726 0 : for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
6727 : {
6728 0 : v0x = v0->x;
6729 0 : v0y = v0->y;
6730 0 : v1x = v1->x;
6731 0 : v1y = v1->y;
6732 0 : rx += v0x*v1x-v0y*v1y;
6733 0 : ry += v0x*v1y+v0y*v1x;
6734 : }
6735 : }
6736 0 : return alglib::complex(rx,ry);
6737 : }
6738 :
6739 0 : alglib::complex alglib::vdotproduct(const alglib::complex *v1, const alglib::complex *v2, ae_int_t N)
6740 : {
6741 0 : return vdotproduct(v1, 1, "N", v2, 1, "N", N);
6742 : }
6743 :
6744 0 : void alglib::vmove(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n)
6745 : {
6746 : ae_int_t i;
6747 0 : if( stride_dst!=1 || stride_src!=1 )
6748 : {
6749 : //
6750 : // general unoptimized case
6751 : //
6752 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6753 0 : *vdst = *vsrc;
6754 : }
6755 : else
6756 : {
6757 : //
6758 : // optimized case
6759 : //
6760 0 : ae_int_t n2 = n/2;
6761 0 : for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6762 : {
6763 0 : vdst[0] = vsrc[0];
6764 0 : vdst[1] = vsrc[1];
6765 : }
6766 0 : if( n%2!=0 )
6767 0 : vdst[0] = vsrc[0];
6768 : }
6769 0 : }
6770 :
6771 0 : void alglib::vmove(double *vdst, const double* vsrc, ae_int_t N)
6772 : {
6773 0 : vmove(vdst, 1, vsrc, 1, N);
6774 0 : }
6775 :
6776 0 : void alglib::vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
6777 : {
6778 0 : bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
6779 : ae_int_t i;
6780 0 : if( stride_dst!=1 || stride_src!=1 )
6781 : {
6782 : //
6783 : // general unoptimized case
6784 : //
6785 0 : if( bconj )
6786 : {
6787 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6788 : {
6789 0 : vdst->x = vsrc->x;
6790 0 : vdst->y = -vsrc->y;
6791 : }
6792 : }
6793 : else
6794 : {
6795 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6796 0 : *vdst = *vsrc;
6797 : }
6798 : }
6799 : else
6800 : {
6801 : //
6802 : // optimized case
6803 : //
6804 0 : if( bconj )
6805 : {
6806 0 : for(i=0; i<n; i++, vdst++, vsrc++)
6807 : {
6808 0 : vdst->x = vsrc->x;
6809 0 : vdst->y = -vsrc->y;
6810 : }
6811 : }
6812 : else
6813 : {
6814 0 : for(i=0; i<n; i++, vdst++, vsrc++)
6815 0 : *vdst = *vsrc;
6816 : }
6817 : }
6818 0 : }
6819 :
6820 0 : void alglib::vmove(alglib::complex *vdst, const alglib::complex* vsrc, ae_int_t N)
6821 : {
6822 0 : vmove(vdst, 1, vsrc, 1, "N", N);
6823 0 : }
6824 :
6825 0 : void alglib::vmoveneg(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n)
6826 : {
6827 : ae_int_t i;
6828 0 : if( stride_dst!=1 || stride_src!=1 )
6829 : {
6830 : //
6831 : // general unoptimized case
6832 : //
6833 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6834 0 : *vdst = -*vsrc;
6835 : }
6836 : else
6837 : {
6838 : //
6839 : // optimized case
6840 : //
6841 0 : ae_int_t n2 = n/2;
6842 0 : for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6843 : {
6844 0 : vdst[0] = -vsrc[0];
6845 0 : vdst[1] = -vsrc[1];
6846 : }
6847 0 : if( n%2!=0 )
6848 0 : vdst[0] = -vsrc[0];
6849 : }
6850 0 : }
6851 :
6852 0 : void alglib::vmoveneg(double *vdst, const double *vsrc, ae_int_t N)
6853 : {
6854 0 : vmoveneg(vdst, 1, vsrc, 1, N);
6855 0 : }
6856 :
6857 0 : void alglib::vmoveneg(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
6858 : {
6859 0 : bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
6860 : ae_int_t i;
6861 0 : if( stride_dst!=1 || stride_src!=1 )
6862 : {
6863 : //
6864 : // general unoptimized case
6865 : //
6866 0 : if( bconj )
6867 : {
6868 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6869 : {
6870 0 : vdst->x = -vsrc->x;
6871 0 : vdst->y = vsrc->y;
6872 : }
6873 : }
6874 : else
6875 : {
6876 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6877 : {
6878 0 : vdst->x = -vsrc->x;
6879 0 : vdst->y = -vsrc->y;
6880 : }
6881 : }
6882 : }
6883 : else
6884 : {
6885 : //
6886 : // optimized case
6887 : //
6888 0 : if( bconj )
6889 : {
6890 0 : for(i=0; i<n; i++, vdst++, vsrc++)
6891 : {
6892 0 : vdst->x = -vsrc->x;
6893 0 : vdst->y = vsrc->y;
6894 : }
6895 : }
6896 : else
6897 : {
6898 0 : for(i=0; i<n; i++, vdst++, vsrc++)
6899 : {
6900 0 : vdst->x = -vsrc->x;
6901 0 : vdst->y = -vsrc->y;
6902 : }
6903 : }
6904 : }
6905 0 : }
6906 :
6907 0 : void alglib::vmoveneg(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N)
6908 : {
6909 0 : vmoveneg(vdst, 1, vsrc, 1, "N", N);
6910 0 : }
6911 :
6912 0 : void alglib::vmove(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
6913 : {
6914 : ae_int_t i;
6915 0 : if( stride_dst!=1 || stride_src!=1 )
6916 : {
6917 : //
6918 : // general unoptimized case
6919 : //
6920 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6921 0 : *vdst = alpha*(*vsrc);
6922 : }
6923 : else
6924 : {
6925 : //
6926 : // optimized case
6927 : //
6928 0 : ae_int_t n2 = n/2;
6929 0 : for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6930 : {
6931 0 : vdst[0] = alpha*vsrc[0];
6932 0 : vdst[1] = alpha*vsrc[1];
6933 : }
6934 0 : if( n%2!=0 )
6935 0 : vdst[0] = alpha*vsrc[0];
6936 : }
6937 0 : }
6938 :
6939 0 : void alglib::vmove(double *vdst, const double *vsrc, ae_int_t N, double alpha)
6940 : {
6941 0 : vmove(vdst, 1, vsrc, 1, N, alpha);
6942 0 : }
6943 :
6944 0 : void alglib::vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
6945 : {
6946 0 : bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
6947 : ae_int_t i;
6948 0 : if( stride_dst!=1 || stride_src!=1 )
6949 : {
6950 : //
6951 : // general unoptimized case
6952 : //
6953 0 : if( bconj )
6954 : {
6955 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6956 : {
6957 0 : vdst->x = alpha*vsrc->x;
6958 0 : vdst->y = -alpha*vsrc->y;
6959 : }
6960 : }
6961 : else
6962 : {
6963 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6964 : {
6965 0 : vdst->x = alpha*vsrc->x;
6966 0 : vdst->y = alpha*vsrc->y;
6967 : }
6968 : }
6969 : }
6970 : else
6971 : {
6972 : //
6973 : // optimized case
6974 : //
6975 0 : if( bconj )
6976 : {
6977 0 : for(i=0; i<n; i++, vdst++, vsrc++)
6978 : {
6979 0 : vdst->x = alpha*vsrc->x;
6980 0 : vdst->y = -alpha*vsrc->y;
6981 : }
6982 : }
6983 : else
6984 : {
6985 0 : for(i=0; i<n; i++, vdst++, vsrc++)
6986 : {
6987 0 : vdst->x = alpha*vsrc->x;
6988 0 : vdst->y = alpha*vsrc->y;
6989 : }
6990 : }
6991 : }
6992 0 : }
6993 :
6994 0 : void alglib::vmove(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, double alpha)
6995 : {
6996 0 : vmove(vdst, 1, vsrc, 1, "N", N, alpha);
6997 0 : }
6998 :
6999 0 : void alglib::vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha)
7000 : {
7001 0 : bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
7002 : ae_int_t i;
7003 0 : if( stride_dst!=1 || stride_src!=1 )
7004 : {
7005 : //
7006 : // general unoptimized case
7007 : //
7008 0 : if( bconj )
7009 : {
7010 0 : double ax = alpha.x, ay = alpha.y;
7011 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7012 : {
7013 0 : vdst->x = ax*vsrc->x+ay*vsrc->y;
7014 0 : vdst->y = -ax*vsrc->y+ay*vsrc->x;
7015 : }
7016 : }
7017 : else
7018 : {
7019 0 : double ax = alpha.x, ay = alpha.y;
7020 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7021 : {
7022 0 : vdst->x = ax*vsrc->x-ay*vsrc->y;
7023 0 : vdst->y = ax*vsrc->y+ay*vsrc->x;
7024 : }
7025 : }
7026 0 : }
7027 : else
7028 : {
7029 : //
7030 : // optimized case
7031 : //
7032 0 : if( bconj )
7033 : {
7034 0 : double ax = alpha.x, ay = alpha.y;
7035 0 : for(i=0; i<n; i++, vdst++, vsrc++)
7036 : {
7037 0 : vdst->x = ax*vsrc->x+ay*vsrc->y;
7038 0 : vdst->y = -ax*vsrc->y+ay*vsrc->x;
7039 : }
7040 : }
7041 : else
7042 : {
7043 0 : double ax = alpha.x, ay = alpha.y;
7044 0 : for(i=0; i<n; i++, vdst++, vsrc++)
7045 : {
7046 0 : vdst->x = ax*vsrc->x-ay*vsrc->y;
7047 0 : vdst->y = ax*vsrc->y+ay*vsrc->x;
7048 : }
7049 : }
7050 : }
7051 0 : }
7052 :
7053 0 : void alglib::vmove(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, alglib::complex alpha)
7054 : {
7055 0 : vmove(vdst, 1, vsrc, 1, "N", N, alpha);
7056 0 : }
7057 :
7058 0 : void alglib::vadd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n)
7059 : {
7060 : ae_int_t i;
7061 0 : if( stride_dst!=1 || stride_src!=1 )
7062 : {
7063 : //
7064 : // general unoptimized case
7065 : //
7066 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7067 0 : *vdst += *vsrc;
7068 : }
7069 : else
7070 : {
7071 : //
7072 : // optimized case
7073 : //
7074 0 : ae_int_t n2 = n/2;
7075 0 : for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
7076 : {
7077 0 : vdst[0] += vsrc[0];
7078 0 : vdst[1] += vsrc[1];
7079 : }
7080 0 : if( n%2!=0 )
7081 0 : vdst[0] += vsrc[0];
7082 : }
7083 0 : }
7084 :
7085 0 : void alglib::vadd(double *vdst, const double *vsrc, ae_int_t N)
7086 : {
7087 0 : vadd(vdst, 1, vsrc, 1, N);
7088 0 : }
7089 :
7090 0 : void alglib::vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
7091 : {
7092 0 : bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
7093 : ae_int_t i;
7094 0 : if( stride_dst!=1 || stride_src!=1 )
7095 : {
7096 : //
7097 : // general unoptimized case
7098 : //
7099 0 : if( bconj )
7100 : {
7101 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7102 : {
7103 0 : vdst->x += vsrc->x;
7104 0 : vdst->y -= vsrc->y;
7105 : }
7106 : }
7107 : else
7108 : {
7109 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7110 : {
7111 0 : vdst->x += vsrc->x;
7112 0 : vdst->y += vsrc->y;
7113 : }
7114 : }
7115 : }
7116 : else
7117 : {
7118 : //
7119 : // optimized case
7120 : //
7121 0 : if( bconj )
7122 : {
7123 0 : for(i=0; i<n; i++, vdst++, vsrc++)
7124 : {
7125 0 : vdst->x += vsrc->x;
7126 0 : vdst->y -= vsrc->y;
7127 : }
7128 : }
7129 : else
7130 : {
7131 0 : for(i=0; i<n; i++, vdst++, vsrc++)
7132 : {
7133 0 : vdst->x += vsrc->x;
7134 0 : vdst->y += vsrc->y;
7135 : }
7136 : }
7137 : }
7138 0 : }
7139 :
7140 0 : void alglib::vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N)
7141 : {
7142 0 : vadd(vdst, 1, vsrc, 1, "N", N);
7143 0 : }
7144 :
7145 0 : void alglib::vadd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
7146 : {
7147 : ae_int_t i;
7148 0 : if( stride_dst!=1 || stride_src!=1 )
7149 : {
7150 : //
7151 : // general unoptimized case
7152 : //
7153 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7154 0 : *vdst += alpha*(*vsrc);
7155 : }
7156 : else
7157 : {
7158 : //
7159 : // optimized case
7160 : //
7161 0 : ae_int_t n2 = n/2;
7162 0 : for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
7163 : {
7164 0 : vdst[0] += alpha*vsrc[0];
7165 0 : vdst[1] += alpha*vsrc[1];
7166 : }
7167 0 : if( n%2!=0 )
7168 0 : vdst[0] += alpha*vsrc[0];
7169 : }
7170 0 : }
7171 :
7172 0 : void alglib::vadd(double *vdst, const double *vsrc, ae_int_t N, double alpha)
7173 : {
7174 0 : vadd(vdst, 1, vsrc, 1, N, alpha);
7175 0 : }
7176 :
7177 0 : void alglib::vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
7178 : {
7179 0 : bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
7180 : ae_int_t i;
7181 0 : if( stride_dst!=1 || stride_src!=1 )
7182 : {
7183 : //
7184 : // general unoptimized case
7185 : //
7186 0 : if( bconj )
7187 : {
7188 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7189 : {
7190 0 : vdst->x += alpha*vsrc->x;
7191 0 : vdst->y -= alpha*vsrc->y;
7192 : }
7193 : }
7194 : else
7195 : {
7196 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7197 : {
7198 0 : vdst->x += alpha*vsrc->x;
7199 0 : vdst->y += alpha*vsrc->y;
7200 : }
7201 : }
7202 : }
7203 : else
7204 : {
7205 : //
7206 : // optimized case
7207 : //
7208 0 : if( bconj )
7209 : {
7210 0 : for(i=0; i<n; i++, vdst++, vsrc++)
7211 : {
7212 0 : vdst->x += alpha*vsrc->x;
7213 0 : vdst->y -= alpha*vsrc->y;
7214 : }
7215 : }
7216 : else
7217 : {
7218 0 : for(i=0; i<n; i++, vdst++, vsrc++)
7219 : {
7220 0 : vdst->x += alpha*vsrc->x;
7221 0 : vdst->y += alpha*vsrc->y;
7222 : }
7223 : }
7224 : }
7225 0 : }
7226 :
7227 0 : void alglib::vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, double alpha)
7228 : {
7229 0 : vadd(vdst, 1, vsrc, 1, "N", N, alpha);
7230 0 : }
7231 :
7232 0 : void alglib::vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha)
7233 : {
7234 0 : bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
7235 : ae_int_t i;
7236 0 : if( stride_dst!=1 || stride_src!=1 )
7237 : {
7238 : //
7239 : // general unoptimized case
7240 : //
7241 0 : double ax = alpha.x, ay = alpha.y;
7242 0 : if( bconj )
7243 : {
7244 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7245 : {
7246 0 : vdst->x += ax*vsrc->x+ay*vsrc->y;
7247 0 : vdst->y -= ax*vsrc->y-ay*vsrc->x;
7248 : }
7249 : }
7250 : else
7251 : {
7252 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7253 : {
7254 0 : vdst->x += ax*vsrc->x-ay*vsrc->y;
7255 0 : vdst->y += ax*vsrc->y+ay*vsrc->x;
7256 : }
7257 : }
7258 0 : }
7259 : else
7260 : {
7261 : //
7262 : // optimized case
7263 : //
7264 0 : double ax = alpha.x, ay = alpha.y;
7265 0 : if( bconj )
7266 : {
7267 0 : for(i=0; i<n; i++, vdst++, vsrc++)
7268 : {
7269 0 : vdst->x += ax*vsrc->x+ay*vsrc->y;
7270 0 : vdst->y -= ax*vsrc->y-ay*vsrc->x;
7271 : }
7272 : }
7273 : else
7274 : {
7275 0 : for(i=0; i<n; i++, vdst++, vsrc++)
7276 : {
7277 0 : vdst->x += ax*vsrc->x-ay*vsrc->y;
7278 0 : vdst->y += ax*vsrc->y+ay*vsrc->x;
7279 : }
7280 : }
7281 : }
7282 0 : }
7283 :
7284 0 : void alglib::vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, alglib::complex alpha)
7285 : {
7286 0 : vadd(vdst, 1, vsrc, 1, "N", N, alpha);
7287 0 : }
7288 :
7289 0 : void alglib::vsub(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n)
7290 : {
7291 : ae_int_t i;
7292 0 : if( stride_dst!=1 || stride_src!=1 )
7293 : {
7294 : //
7295 : // general unoptimized case
7296 : //
7297 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7298 0 : *vdst -= *vsrc;
7299 : }
7300 : else
7301 : {
7302 : //
7303 : // optimized case
7304 : //
7305 0 : ae_int_t n2 = n/2;
7306 0 : for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
7307 : {
7308 0 : vdst[0] -= vsrc[0];
7309 0 : vdst[1] -= vsrc[1];
7310 : }
7311 0 : if( n%2!=0 )
7312 0 : vdst[0] -= vsrc[0];
7313 : }
7314 0 : }
7315 :
7316 0 : void alglib::vsub(double *vdst, const double *vsrc, ae_int_t N)
7317 : {
7318 0 : vsub(vdst, 1, vsrc, 1, N);
7319 0 : }
7320 :
7321 0 : void alglib::vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
7322 : {
7323 0 : bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
7324 : ae_int_t i;
7325 0 : if( stride_dst!=1 || stride_src!=1 )
7326 : {
7327 : //
7328 : // general unoptimized case
7329 : //
7330 0 : if( bconj )
7331 : {
7332 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7333 : {
7334 0 : vdst->x -= vsrc->x;
7335 0 : vdst->y += vsrc->y;
7336 : }
7337 : }
7338 : else
7339 : {
7340 0 : for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7341 : {
7342 0 : vdst->x -= vsrc->x;
7343 0 : vdst->y -= vsrc->y;
7344 : }
7345 : }
7346 : }
7347 : else
7348 : {
7349 : //
7350 : // optimized case
7351 : //
7352 0 : if( bconj )
7353 : {
7354 0 : for(i=0; i<n; i++, vdst++, vsrc++)
7355 : {
7356 0 : vdst->x -= vsrc->x;
7357 0 : vdst->y += vsrc->y;
7358 : }
7359 : }
7360 : else
7361 : {
7362 0 : for(i=0; i<n; i++, vdst++, vsrc++)
7363 : {
7364 0 : vdst->x -= vsrc->x;
7365 0 : vdst->y -= vsrc->y;
7366 : }
7367 : }
7368 : }
7369 0 : }
7370 :
7371 0 : void alglib::vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N)
7372 : {
7373 0 : vsub(vdst, 1, vsrc, 1, "N", N);
7374 0 : }
7375 :
7376 0 : void alglib::vsub(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
7377 : {
7378 0 : vadd(vdst, stride_dst, vsrc, stride_src, n, -alpha);
7379 0 : }
7380 :
7381 0 : void alglib::vsub(double *vdst, const double *vsrc, ae_int_t N, double alpha)
7382 : {
7383 0 : vadd(vdst, 1, vsrc, 1, N, -alpha);
7384 0 : }
7385 :
7386 0 : void alglib::vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
7387 : {
7388 0 : vadd(vdst, stride_dst, vsrc, stride_src, conj_src, n, -alpha);
7389 0 : }
7390 :
7391 0 : void alglib::vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t n, double alpha)
7392 : {
7393 0 : vadd(vdst, 1, vsrc, 1, "N", n, -alpha);
7394 0 : }
7395 :
7396 0 : void alglib::vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha)
7397 : {
7398 0 : vadd(vdst, stride_dst, vsrc, stride_src, conj_src, n, -alpha);
7399 0 : }
7400 :
7401 0 : void alglib::vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t n, alglib::complex alpha)
7402 : {
7403 0 : vadd(vdst, 1, vsrc, 1, "N", n, -alpha);
7404 0 : }
7405 0 : void alglib::vmul(double *vdst, ae_int_t stride_dst, ae_int_t n, double alpha)
7406 : {
7407 : ae_int_t i;
7408 0 : if( stride_dst!=1 )
7409 : {
7410 : //
7411 : // general unoptimized case
7412 : //
7413 0 : for(i=0; i<n; i++, vdst+=stride_dst)
7414 0 : *vdst *= alpha;
7415 : }
7416 : else
7417 : {
7418 : //
7419 : // optimized case
7420 : //
7421 0 : for(i=0; i<n; i++, vdst++)
7422 0 : *vdst *= alpha;
7423 : }
7424 0 : }
7425 :
7426 0 : void alglib::vmul(double *vdst, ae_int_t N, double alpha)
7427 : {
7428 0 : vmul(vdst, 1, N, alpha);
7429 0 : }
7430 :
7431 0 : void alglib::vmul(alglib::complex *vdst, ae_int_t stride_dst, ae_int_t n, double alpha)
7432 : {
7433 : ae_int_t i;
7434 0 : if( stride_dst!=1 )
7435 : {
7436 : //
7437 : // general unoptimized case
7438 : //
7439 0 : for(i=0; i<n; i++, vdst+=stride_dst)
7440 : {
7441 0 : vdst->x *= alpha;
7442 0 : vdst->y *= alpha;
7443 : }
7444 : }
7445 : else
7446 : {
7447 : //
7448 : // optimized case
7449 : //
7450 0 : for(i=0; i<n; i++, vdst++)
7451 : {
7452 0 : vdst->x *= alpha;
7453 0 : vdst->y *= alpha;
7454 : }
7455 : }
7456 0 : }
7457 :
7458 0 : void alglib::vmul(alglib::complex *vdst, ae_int_t N, double alpha)
7459 : {
7460 0 : vmul(vdst, 1, N, alpha);
7461 0 : }
7462 :
7463 0 : void alglib::vmul(alglib::complex *vdst, ae_int_t stride_dst, ae_int_t n, alglib::complex alpha)
7464 : {
7465 : ae_int_t i;
7466 0 : if( stride_dst!=1 )
7467 : {
7468 : //
7469 : // general unoptimized case
7470 : //
7471 0 : double ax = alpha.x, ay = alpha.y;
7472 0 : for(i=0; i<n; i++, vdst+=stride_dst)
7473 : {
7474 0 : double dstx = vdst->x, dsty = vdst->y;
7475 0 : vdst->x = ax*dstx-ay*dsty;
7476 0 : vdst->y = ax*dsty+ay*dstx;
7477 : }
7478 : }
7479 : else
7480 : {
7481 : //
7482 : // optimized case
7483 : //
7484 0 : double ax = alpha.x, ay = alpha.y;
7485 0 : for(i=0; i<n; i++, vdst++)
7486 : {
7487 0 : double dstx = vdst->x, dsty = vdst->y;
7488 0 : vdst->x = ax*dstx-ay*dsty;
7489 0 : vdst->y = ax*dsty+ay*dstx;
7490 : }
7491 : }
7492 0 : }
7493 :
7494 0 : void alglib::vmul(alglib::complex *vdst, ae_int_t N, alglib::complex alpha)
7495 : {
7496 0 : vmul(vdst, 1, N, alpha);
7497 0 : }
7498 :
7499 0 : alglib::ae_int_t alglib::vlen(ae_int_t n1, ae_int_t n2)
7500 : {
7501 0 : return n2-n1+1;
7502 : }
7503 :
7504 :
7505 : /********************************************************************
7506 : Matrices and vectors
7507 : ********************************************************************/
7508 0 : alglib::ae_vector_wrapper::ae_vector_wrapper(alglib_impl::ae_vector *e_ptr, alglib_impl::ae_datatype datatype)
7509 : {
7510 0 : if( e_ptr==NULL || e_ptr->datatype!=datatype )
7511 : {
7512 0 : const char *msg = "ALGLIB: ae_vector_wrapper datatype check failed";
7513 : #if !defined(AE_NO_EXCEPTIONS)
7514 0 : _ALGLIB_CPP_EXCEPTION(msg);
7515 : #else
7516 : ptr = NULL;
7517 : is_frozen_proxy = false;
7518 : _ALGLIB_SET_ERROR_FLAG(msg);
7519 : return;
7520 : #endif
7521 : }
7522 0 : ptr = e_ptr;
7523 0 : is_frozen_proxy = true;
7524 0 : }
7525 :
7526 0 : alglib::ae_vector_wrapper::ae_vector_wrapper(alglib_impl::ae_datatype datatype)
7527 : {
7528 : jmp_buf _break_jump;
7529 : alglib_impl::ae_state _state;
7530 :
7531 0 : alglib_impl::ae_state_init(&_state);
7532 0 : if( setjmp(_break_jump) )
7533 : {
7534 : #if !defined(AE_NO_EXCEPTIONS)
7535 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7536 : #else
7537 : ptr = NULL;
7538 : is_frozen_proxy = false;
7539 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7540 : return;
7541 : #endif
7542 : }
7543 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7544 0 : ptr = &inner_vec;
7545 0 : is_frozen_proxy = false;
7546 0 : memset(ptr, 0, sizeof(*ptr));
7547 0 : ae_vector_init(ptr, 0, datatype, &_state, ae_false);
7548 0 : ae_state_clear(&_state);
7549 0 : }
7550 :
7551 0 : alglib::ae_vector_wrapper::ae_vector_wrapper(const ae_vector_wrapper &rhs, alglib_impl::ae_datatype datatype)
7552 : {
7553 : jmp_buf _break_jump;
7554 : alglib_impl::ae_state _state;
7555 :
7556 0 : alglib_impl::ae_state_init(&_state);
7557 0 : if( setjmp(_break_jump) )
7558 : {
7559 : #if !defined(AE_NO_EXCEPTIONS)
7560 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7561 : #else
7562 : ptr = NULL;
7563 : is_frozen_proxy = false;
7564 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7565 : return;
7566 : #endif
7567 : }
7568 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7569 0 : alglib_impl::ae_assert(rhs.ptr!=NULL, "ALGLIB: ae_vector_wrapper source is not initialized", &_state);
7570 0 : alglib_impl::ae_assert(rhs.ptr->datatype==datatype, "ALGLIB: ae_vector_wrapper datatype check failed", &_state);
7571 0 : ptr = &inner_vec;
7572 0 : is_frozen_proxy = false;
7573 0 : memset(ptr, 0, sizeof(*ptr));
7574 0 : ae_vector_init_copy(ptr, rhs.ptr, &_state, ae_false);
7575 0 : ae_state_clear(&_state);
7576 0 : }
7577 :
7578 0 : alglib::ae_vector_wrapper::~ae_vector_wrapper()
7579 : {
7580 0 : if( ptr==&inner_vec )
7581 0 : ae_vector_clear(ptr);
7582 0 : }
7583 :
7584 0 : void alglib::ae_vector_wrapper::setlength(ae_int_t iLen)
7585 : {
7586 : jmp_buf _break_jump;
7587 : alglib_impl::ae_state _state;
7588 0 : alglib_impl::ae_state_init(&_state);
7589 0 : if( setjmp(_break_jump) )
7590 : {
7591 : #if !defined(AE_NO_EXCEPTIONS)
7592 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7593 : #else
7594 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7595 : return;
7596 : #endif
7597 : }
7598 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7599 0 : alglib_impl::ae_assert(ptr!=NULL, "ALGLIB: setlength() error, ptr==NULL (array was not correctly initialized)", &_state);
7600 0 : alglib_impl::ae_assert(!is_frozen_proxy, "ALGLIB: setlength() error, ptr is frozen proxy array", &_state);
7601 0 : alglib_impl::ae_vector_set_length(ptr, iLen, &_state);
7602 0 : alglib_impl::ae_state_clear(&_state);
7603 0 : }
7604 :
7605 0 : alglib::ae_int_t alglib::ae_vector_wrapper::length() const
7606 : {
7607 0 : if( ptr==NULL )
7608 0 : return 0;
7609 0 : return ptr->cnt;
7610 : }
7611 :
7612 0 : void alglib::ae_vector_wrapper::attach_to(alglib_impl::x_vector *new_ptr, alglib_impl::ae_state *_state)
7613 : {
7614 0 : if( ptr==&inner_vec )
7615 0 : ae_vector_clear(ptr);
7616 0 : ptr = &inner_vec;
7617 0 : memset(ptr, 0, sizeof(*ptr));
7618 0 : ae_vector_init_attach_to_x(ptr, new_ptr, _state, ae_false);
7619 0 : is_frozen_proxy = true;
7620 0 : }
7621 :
7622 0 : const alglib::ae_vector_wrapper& alglib::ae_vector_wrapper::assign(const alglib::ae_vector_wrapper &rhs)
7623 : {
7624 : jmp_buf _break_jump;
7625 : alglib_impl::ae_state _state;
7626 0 : if( this==&rhs )
7627 0 : return *this;
7628 0 : alglib_impl::ae_state_init(&_state);
7629 0 : if( setjmp(_break_jump) )
7630 : {
7631 : #if !defined(AE_NO_EXCEPTIONS)
7632 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7633 : #else
7634 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7635 : return *this;
7636 : #endif
7637 : }
7638 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7639 0 : ae_assert(ptr!=NULL, "ALGLIB: incorrect assignment (uninitialized destination)", &_state);
7640 0 : ae_assert(rhs.ptr!=NULL, "ALGLIB: incorrect assignment (uninitialized source)", &_state);
7641 0 : ae_assert(rhs.ptr->datatype==ptr->datatype, "ALGLIB: incorrect assignment to array (types do not match)", &_state);
7642 0 : if( is_frozen_proxy )
7643 0 : ae_assert(rhs.ptr->cnt==ptr->cnt, "ALGLIB: incorrect assignment to proxy array (sizes do not match)", &_state);
7644 0 : if( rhs.ptr->cnt!=ptr->cnt )
7645 0 : ae_vector_set_length(ptr, rhs.ptr->cnt, &_state);
7646 0 : memcpy(ptr->ptr.p_ptr, rhs.ptr->ptr.p_ptr, ptr->cnt*alglib_impl::ae_sizeof(ptr->datatype));
7647 0 : alglib_impl::ae_state_clear(&_state);
7648 0 : return *this;
7649 : }
7650 :
7651 0 : const alglib_impl::ae_vector* alglib::ae_vector_wrapper::c_ptr() const
7652 : {
7653 0 : return ptr;
7654 : }
7655 :
7656 0 : alglib_impl::ae_vector* alglib::ae_vector_wrapper::c_ptr()
7657 : {
7658 0 : return ptr;
7659 : }
7660 :
7661 : #if !defined(AE_NO_EXCEPTIONS)
7662 0 : alglib::ae_vector_wrapper::ae_vector_wrapper(const char *s, alglib_impl::ae_datatype datatype)
7663 : {
7664 0 : std::vector<const char*> svec;
7665 : size_t i;
7666 0 : char *p = filter_spaces(s);
7667 0 : if( p==NULL )
7668 0 : _ALGLIB_CPP_EXCEPTION("ALGLIB: allocation error");
7669 : try
7670 : {
7671 0 : str_vector_create(p, true, &svec);
7672 : {
7673 : jmp_buf _break_jump;
7674 : alglib_impl::ae_state _state;
7675 0 : alglib_impl::ae_state_init(&_state);
7676 0 : if( setjmp(_break_jump) )
7677 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7678 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7679 0 : ptr = &inner_vec;
7680 0 : is_frozen_proxy = false;
7681 0 : memset(ptr, 0, sizeof(*ptr));
7682 0 : ae_vector_init(ptr, (ae_int_t)(svec.size()), datatype, &_state, ae_false);
7683 0 : ae_state_clear(&_state);
7684 : }
7685 0 : for(i=0; i<svec.size(); i++)
7686 : {
7687 0 : if( datatype==alglib_impl::DT_BOOL )
7688 0 : ptr->ptr.p_bool[i] = parse_bool_delim(svec[i],",]");
7689 0 : if( datatype==alglib_impl::DT_INT )
7690 0 : ptr->ptr.p_int[i] = parse_int_delim(svec[i],",]");
7691 0 : if( datatype==alglib_impl::DT_REAL )
7692 0 : ptr->ptr.p_double[i] = parse_real_delim(svec[i],",]");
7693 0 : if( datatype==alglib_impl::DT_COMPLEX )
7694 : {
7695 0 : alglib::complex t = parse_complex_delim(svec[i],",]");
7696 0 : ptr->ptr.p_complex[i].x = t.x;
7697 0 : ptr->ptr.p_complex[i].y = t.y;
7698 : }
7699 : }
7700 0 : alglib_impl::ae_free(p);
7701 : }
7702 0 : catch(...)
7703 : {
7704 0 : alglib_impl::ae_free(p);
7705 0 : throw;
7706 0 : }
7707 0 : }
7708 : #endif
7709 :
7710 0 : alglib::boolean_1d_array::boolean_1d_array():ae_vector_wrapper(alglib_impl::DT_BOOL)
7711 : {
7712 0 : }
7713 :
7714 0 : alglib::boolean_1d_array::boolean_1d_array(const alglib::boolean_1d_array &rhs):ae_vector_wrapper(rhs,alglib_impl::DT_BOOL)
7715 : {
7716 0 : }
7717 :
7718 0 : alglib::boolean_1d_array::boolean_1d_array(alglib_impl::ae_vector *p):ae_vector_wrapper(p,alglib_impl::DT_BOOL)
7719 : {
7720 0 : }
7721 :
7722 0 : const alglib::boolean_1d_array& alglib::boolean_1d_array::operator=(const alglib::boolean_1d_array &rhs)
7723 : {
7724 0 : return static_cast<const alglib::boolean_1d_array&>(assign(rhs));
7725 : }
7726 :
7727 0 : alglib::boolean_1d_array::~boolean_1d_array()
7728 : {
7729 0 : }
7730 :
7731 0 : const ae_bool& alglib::boolean_1d_array::operator()(ae_int_t i) const
7732 : {
7733 0 : return ptr->ptr.p_bool[i];
7734 : }
7735 :
7736 0 : ae_bool& alglib::boolean_1d_array::operator()(ae_int_t i)
7737 : {
7738 0 : return ptr->ptr.p_bool[i];
7739 : }
7740 :
7741 0 : const ae_bool& alglib::boolean_1d_array::operator[](ae_int_t i) const
7742 : {
7743 0 : return ptr->ptr.p_bool[i];
7744 : }
7745 :
7746 0 : ae_bool& alglib::boolean_1d_array::operator[](ae_int_t i)
7747 : {
7748 0 : return ptr->ptr.p_bool[i];
7749 : }
7750 :
7751 0 : void alglib::boolean_1d_array::setcontent(ae_int_t iLen, const bool *pContent )
7752 : {
7753 : ae_int_t i;
7754 :
7755 : // setlength, with exception-free error handling fallback code
7756 0 : setlength(iLen);
7757 0 : if( ptr==NULL || ptr->cnt!=iLen )
7758 0 : return;
7759 :
7760 : // copy
7761 0 : for(i=0; i<iLen; i++)
7762 0 : ptr->ptr.p_bool[i] = pContent[i];
7763 : }
7764 :
7765 0 : ae_bool* alglib::boolean_1d_array::getcontent()
7766 : {
7767 0 : return ptr->ptr.p_bool;
7768 : }
7769 :
7770 0 : const ae_bool* alglib::boolean_1d_array::getcontent() const
7771 : {
7772 0 : return ptr->ptr.p_bool;
7773 : }
7774 :
7775 : #if !defined(AE_NO_EXCEPTIONS)
7776 0 : alglib::boolean_1d_array::boolean_1d_array(const char *s):ae_vector_wrapper(s, alglib_impl::DT_BOOL)
7777 : {
7778 0 : }
7779 :
7780 0 : std::string alglib::boolean_1d_array::tostring() const
7781 : {
7782 0 : if( length()==0 )
7783 0 : return "[]";
7784 0 : return arraytostring(&(operator()(0)), length());
7785 : }
7786 : #endif
7787 :
7788 0 : alglib::integer_1d_array::integer_1d_array():ae_vector_wrapper(alglib_impl::DT_INT)
7789 : {
7790 0 : }
7791 :
7792 0 : alglib::integer_1d_array::integer_1d_array(alglib_impl::ae_vector *p):ae_vector_wrapper(p,alglib_impl::DT_INT)
7793 : {
7794 0 : }
7795 :
7796 0 : alglib::integer_1d_array::integer_1d_array(const alglib::integer_1d_array &rhs):ae_vector_wrapper(rhs,alglib_impl::DT_INT)
7797 : {
7798 0 : }
7799 :
7800 0 : const alglib::integer_1d_array& alglib::integer_1d_array::operator=(const alglib::integer_1d_array &rhs)
7801 : {
7802 0 : return static_cast<const alglib::integer_1d_array&>(assign(rhs));
7803 : }
7804 :
7805 0 : alglib::integer_1d_array::~integer_1d_array()
7806 : {
7807 0 : }
7808 :
7809 0 : const alglib::ae_int_t& alglib::integer_1d_array::operator()(ae_int_t i) const
7810 : {
7811 0 : return ptr->ptr.p_int[i];
7812 : }
7813 :
7814 0 : alglib::ae_int_t& alglib::integer_1d_array::operator()(ae_int_t i)
7815 : {
7816 0 : return ptr->ptr.p_int[i];
7817 : }
7818 :
7819 0 : const alglib::ae_int_t& alglib::integer_1d_array::operator[](ae_int_t i) const
7820 : {
7821 0 : return ptr->ptr.p_int[i];
7822 : }
7823 :
7824 0 : alglib::ae_int_t& alglib::integer_1d_array::operator[](ae_int_t i)
7825 : {
7826 0 : return ptr->ptr.p_int[i];
7827 : }
7828 :
7829 0 : void alglib::integer_1d_array::setcontent(ae_int_t iLen, const ae_int_t *pContent )
7830 : {
7831 : ae_int_t i;
7832 :
7833 : // setlength(), handle possible exception-free errors
7834 0 : setlength(iLen);
7835 0 : if( ptr==NULL || ptr->cnt!=iLen )
7836 0 : return;
7837 :
7838 : // copy
7839 0 : for(i=0; i<iLen; i++)
7840 0 : ptr->ptr.p_int[i] = pContent[i];
7841 : }
7842 :
7843 0 : alglib::ae_int_t* alglib::integer_1d_array::getcontent()
7844 : {
7845 0 : return ptr->ptr.p_int;
7846 : }
7847 :
7848 0 : const alglib::ae_int_t* alglib::integer_1d_array::getcontent() const
7849 : {
7850 0 : return ptr->ptr.p_int;
7851 : }
7852 :
7853 : #if !defined(AE_NO_EXCEPTIONS)
7854 0 : alglib::integer_1d_array::integer_1d_array(const char *s):ae_vector_wrapper(s, alglib_impl::DT_INT)
7855 : {
7856 0 : }
7857 :
7858 0 : std::string alglib::integer_1d_array::tostring() const
7859 : {
7860 0 : if( length()==0 )
7861 0 : return "[]";
7862 0 : return arraytostring(&operator()(0), length());
7863 : }
7864 : #endif
7865 :
7866 0 : alglib::real_1d_array::real_1d_array():ae_vector_wrapper(alglib_impl::DT_REAL)
7867 : {
7868 0 : }
7869 :
7870 0 : alglib::real_1d_array::real_1d_array(alglib_impl::ae_vector *p):ae_vector_wrapper(p,alglib_impl::DT_REAL)
7871 : {
7872 0 : }
7873 :
7874 0 : alglib::real_1d_array::real_1d_array(const alglib::real_1d_array &rhs):ae_vector_wrapper(rhs,alglib_impl::DT_REAL)
7875 : {
7876 0 : }
7877 :
7878 0 : const alglib::real_1d_array& alglib::real_1d_array::operator=(const alglib::real_1d_array &rhs)
7879 : {
7880 0 : return static_cast<const alglib::real_1d_array&>(assign(rhs));
7881 : }
7882 :
7883 0 : alglib::real_1d_array::~real_1d_array()
7884 : {
7885 0 : }
7886 :
7887 0 : const double& alglib::real_1d_array::operator()(ae_int_t i) const
7888 : {
7889 0 : return ptr->ptr.p_double[i];
7890 : }
7891 :
7892 0 : double& alglib::real_1d_array::operator()(ae_int_t i)
7893 : {
7894 0 : return ptr->ptr.p_double[i];
7895 : }
7896 :
7897 0 : const double& alglib::real_1d_array::operator[](ae_int_t i) const
7898 : {
7899 0 : return ptr->ptr.p_double[i];
7900 : }
7901 :
7902 0 : double& alglib::real_1d_array::operator[](ae_int_t i)
7903 : {
7904 0 : return ptr->ptr.p_double[i];
7905 : }
7906 :
7907 0 : void alglib::real_1d_array::setcontent(ae_int_t iLen, const double *pContent )
7908 : {
7909 : ae_int_t i;
7910 :
7911 : // setlength(), handle possible exception-free errors
7912 0 : setlength(iLen);
7913 0 : if( ptr==NULL || ptr->cnt!=iLen )
7914 0 : return;
7915 :
7916 : // copy
7917 0 : for(i=0; i<iLen; i++)
7918 0 : ptr->ptr.p_double[i] = pContent[i];
7919 : }
7920 :
7921 0 : void alglib::real_1d_array::attach_to_ptr(ae_int_t iLen, double *pContent ) // TODO: convert to constructor!!!!!!!
7922 : {
7923 : alglib_impl::x_vector x;
7924 : jmp_buf _break_jump;
7925 : alglib_impl::ae_state _state;
7926 :
7927 0 : alglib_impl::ae_state_init(&_state);
7928 0 : if( setjmp(_break_jump) )
7929 : {
7930 : #if !defined(AE_NO_EXCEPTIONS)
7931 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7932 : #else
7933 : ptr = NULL;
7934 : is_frozen_proxy = false;
7935 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7936 : return;
7937 : #endif
7938 : }
7939 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7940 0 : alglib_impl::ae_assert(!is_frozen_proxy, "ALGLIB: unable to attach proxy object to something else", &_state);
7941 0 : alglib_impl::ae_assert(iLen>0, "ALGLIB: non-positive length for attach_to_ptr()", &_state);
7942 0 : x.cnt = iLen;
7943 0 : x.datatype = alglib_impl::DT_REAL;
7944 0 : x.owner = alglib_impl::OWN_CALLER;
7945 0 : x.last_action = alglib_impl::ACT_UNCHANGED;
7946 0 : x.x_ptr.p_ptr = pContent;
7947 0 : attach_to(&x, &_state);
7948 0 : ae_state_clear(&_state);
7949 0 : }
7950 :
7951 0 : double* alglib::real_1d_array::getcontent()
7952 : {
7953 0 : return ptr->ptr.p_double;
7954 : }
7955 :
7956 0 : const double* alglib::real_1d_array::getcontent() const
7957 : {
7958 0 : return ptr->ptr.p_double;
7959 : }
7960 :
7961 : #if !defined(AE_NO_EXCEPTIONS)
7962 0 : alglib::real_1d_array::real_1d_array(const char *s):ae_vector_wrapper(s, alglib_impl::DT_REAL)
7963 : {
7964 0 : }
7965 :
7966 0 : std::string alglib::real_1d_array::tostring(int dps) const
7967 : {
7968 0 : if( length()==0 )
7969 0 : return "[]";
7970 0 : return arraytostring(&operator()(0), length(), dps);
7971 : }
7972 : #endif
7973 :
7974 0 : alglib::complex_1d_array::complex_1d_array():ae_vector_wrapper(alglib_impl::DT_COMPLEX)
7975 : {
7976 0 : }
7977 :
7978 0 : alglib::complex_1d_array::complex_1d_array(alglib_impl::ae_vector *p):ae_vector_wrapper(p,alglib_impl::DT_COMPLEX)
7979 : {
7980 0 : }
7981 :
7982 0 : alglib::complex_1d_array::complex_1d_array(const alglib::complex_1d_array &rhs):ae_vector_wrapper(rhs,alglib_impl::DT_COMPLEX)
7983 : {
7984 0 : }
7985 :
7986 0 : const alglib::complex_1d_array& alglib::complex_1d_array::operator=(const alglib::complex_1d_array &rhs)
7987 : {
7988 0 : return static_cast<const alglib::complex_1d_array&>(assign(rhs));
7989 : }
7990 :
7991 0 : alglib::complex_1d_array::~complex_1d_array()
7992 : {
7993 0 : }
7994 :
7995 0 : const alglib::complex& alglib::complex_1d_array::operator()(ae_int_t i) const
7996 : {
7997 0 : return *((const alglib::complex*)(ptr->ptr.p_complex+i));
7998 : }
7999 :
8000 0 : alglib::complex& alglib::complex_1d_array::operator()(ae_int_t i)
8001 : {
8002 0 : return *((alglib::complex*)(ptr->ptr.p_complex+i));
8003 : }
8004 :
8005 0 : const alglib::complex& alglib::complex_1d_array::operator[](ae_int_t i) const
8006 : {
8007 0 : return *((const alglib::complex*)(ptr->ptr.p_complex+i));
8008 : }
8009 :
8010 0 : alglib::complex& alglib::complex_1d_array::operator[](ae_int_t i)
8011 : {
8012 0 : return *((alglib::complex*)(ptr->ptr.p_complex+i));
8013 : }
8014 :
8015 0 : void alglib::complex_1d_array::setcontent(ae_int_t iLen, const alglib::complex *pContent )
8016 : {
8017 : ae_int_t i;
8018 :
8019 : // setlength(), handle possible exception-free errors
8020 0 : setlength(iLen);
8021 0 : if( ptr==NULL || ptr->cnt!=iLen )
8022 0 : return;
8023 :
8024 : // copy
8025 0 : for(i=0; i<iLen; i++)
8026 : {
8027 0 : ptr->ptr.p_complex[i].x = pContent[i].x;
8028 0 : ptr->ptr.p_complex[i].y = pContent[i].y;
8029 : }
8030 : }
8031 :
8032 0 : alglib::complex* alglib::complex_1d_array::getcontent()
8033 : {
8034 0 : return (alglib::complex*)ptr->ptr.p_complex;
8035 : }
8036 :
8037 0 : const alglib::complex* alglib::complex_1d_array::getcontent() const
8038 : {
8039 0 : return (const alglib::complex*)ptr->ptr.p_complex;
8040 : }
8041 :
8042 : #if !defined(AE_NO_EXCEPTIONS)
8043 0 : alglib::complex_1d_array::complex_1d_array(const char *s):ae_vector_wrapper(s, alglib_impl::DT_COMPLEX)
8044 : {
8045 0 : }
8046 :
8047 0 : std::string alglib::complex_1d_array::tostring(int dps) const
8048 : {
8049 0 : if( length()==0 )
8050 0 : return "[]";
8051 0 : return arraytostring(&operator()(0), length(), dps);
8052 : }
8053 : #endif
8054 :
8055 0 : alglib::ae_matrix_wrapper::ae_matrix_wrapper(alglib_impl::ae_matrix *e_ptr, alglib_impl::ae_datatype datatype)
8056 : {
8057 0 : if( e_ptr->datatype!=datatype )
8058 : {
8059 0 : const char *msg = "ALGLIB: ae_vector_wrapper datatype check failed";
8060 : #if !defined(AE_NO_EXCEPTIONS)
8061 0 : _ALGLIB_CPP_EXCEPTION(msg);
8062 : #else
8063 : ptr = NULL;
8064 : is_frozen_proxy = false;
8065 : _ALGLIB_SET_ERROR_FLAG(msg);
8066 : return;
8067 : #endif
8068 : }
8069 0 : ptr = e_ptr;
8070 0 : is_frozen_proxy = true;
8071 0 : }
8072 :
8073 0 : alglib::ae_matrix_wrapper::ae_matrix_wrapper(alglib_impl::ae_datatype datatype)
8074 : {
8075 : jmp_buf _break_jump;
8076 : alglib_impl::ae_state _state;
8077 :
8078 0 : alglib_impl::ae_state_init(&_state);
8079 0 : if( setjmp(_break_jump) )
8080 : {
8081 : #if !defined(AE_NO_EXCEPTIONS)
8082 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8083 : #else
8084 : ptr = NULL;
8085 : is_frozen_proxy = false;
8086 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8087 : return;
8088 : #endif
8089 : }
8090 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8091 0 : ptr = &inner_mat;
8092 0 : is_frozen_proxy = false;
8093 0 : memset(ptr, 0, sizeof(*ptr));
8094 0 : ae_matrix_init(ptr, 0, 0, datatype, &_state, ae_false);
8095 0 : ae_state_clear(&_state);
8096 :
8097 0 : }
8098 :
8099 0 : alglib::ae_matrix_wrapper::ae_matrix_wrapper(const ae_matrix_wrapper &rhs, alglib_impl::ae_datatype datatype)
8100 : {
8101 : jmp_buf _break_jump;
8102 : alglib_impl::ae_state _state;
8103 :
8104 0 : alglib_impl::ae_state_init(&_state);
8105 0 : if( setjmp(_break_jump) )
8106 : {
8107 : #if !defined(AE_NO_EXCEPTIONS)
8108 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8109 : #else
8110 : ptr = NULL;
8111 : is_frozen_proxy = false;
8112 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8113 : return;
8114 : #endif
8115 : }
8116 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8117 0 : is_frozen_proxy = false;
8118 0 : ptr = NULL;
8119 0 : alglib_impl::ae_assert(rhs.ptr->datatype==datatype, "ALGLIB: ae_matrix_wrapper datatype check failed", &_state);
8120 0 : if( rhs.ptr!=NULL )
8121 : {
8122 0 : ptr = &inner_mat;
8123 0 : memset(ptr, 0, sizeof(*ptr));
8124 0 : ae_matrix_init_copy(ptr, rhs.ptr, &_state, ae_false);
8125 : }
8126 0 : ae_state_clear(&_state);
8127 0 : }
8128 :
8129 0 : alglib::ae_matrix_wrapper::~ae_matrix_wrapper()
8130 : {
8131 0 : if( ptr==&inner_mat )
8132 0 : ae_matrix_clear(ptr);
8133 0 : }
8134 :
8135 : #if !defined(AE_NO_EXCEPTIONS)
8136 0 : alglib::ae_matrix_wrapper::ae_matrix_wrapper(const char *s, alglib_impl::ae_datatype datatype)
8137 : {
8138 0 : std::vector< std::vector<const char*> > smat;
8139 : size_t i, j;
8140 0 : char *p = filter_spaces(s);
8141 0 : if( p==NULL )
8142 0 : _ALGLIB_CPP_EXCEPTION("ALGLIB: allocation error");
8143 : try
8144 : {
8145 0 : str_matrix_create(p, &smat);
8146 : {
8147 : jmp_buf _break_jump;
8148 : alglib_impl::ae_state _state;
8149 0 : alglib_impl::ae_state_init(&_state);
8150 0 : if( setjmp(_break_jump) )
8151 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8152 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8153 0 : ptr = &inner_mat;
8154 0 : is_frozen_proxy = false;
8155 0 : memset(ptr, 0, sizeof(*ptr));
8156 0 : if( smat.size()!=0 )
8157 0 : ae_matrix_init(ptr, (ae_int_t)(smat.size()), (ae_int_t)(smat[0].size()), datatype, &_state, ae_false);
8158 : else
8159 0 : ae_matrix_init(ptr, 0, 0, datatype, &_state, ae_false);
8160 0 : ae_state_clear(&_state);
8161 : }
8162 0 : for(i=0; i<smat.size(); i++)
8163 0 : for(j=0; j<smat[0].size(); j++)
8164 : {
8165 0 : if( datatype==alglib_impl::DT_BOOL )
8166 0 : ptr->ptr.pp_bool[i][j] = parse_bool_delim(smat[i][j],",]");
8167 0 : if( datatype==alglib_impl::DT_INT )
8168 0 : ptr->ptr.pp_int[i][j] = parse_int_delim(smat[i][j],",]");
8169 0 : if( datatype==alglib_impl::DT_REAL )
8170 0 : ptr->ptr.pp_double[i][j] = parse_real_delim(smat[i][j],",]");
8171 0 : if( datatype==alglib_impl::DT_COMPLEX )
8172 : {
8173 0 : alglib::complex t = parse_complex_delim(smat[i][j],",]");
8174 0 : ptr->ptr.pp_complex[i][j].x = t.x;
8175 0 : ptr->ptr.pp_complex[i][j].y = t.y;
8176 : }
8177 : }
8178 0 : alglib_impl::ae_free(p);
8179 : }
8180 0 : catch(...)
8181 : {
8182 0 : alglib_impl::ae_free(p);
8183 0 : throw;
8184 0 : }
8185 0 : }
8186 : #endif
8187 :
8188 0 : void alglib::ae_matrix_wrapper::setlength(ae_int_t rows, ae_int_t cols) // TODO: automatic allocation of NULL ptr!!!!!
8189 : {
8190 : jmp_buf _break_jump;
8191 : alglib_impl::ae_state _state;
8192 0 : alglib_impl::ae_state_init(&_state);
8193 0 : if( setjmp(_break_jump) )
8194 : {
8195 : #if !defined(AE_NO_EXCEPTIONS)
8196 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8197 : #else
8198 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8199 : return;
8200 : #endif
8201 : }
8202 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8203 0 : alglib_impl::ae_assert(ptr!=NULL, "ALGLIB: setlength() error, p_mat==NULL (array was not correctly initialized)", &_state);
8204 0 : alglib_impl::ae_assert(!is_frozen_proxy, "ALGLIB: setlength() error, attempt to resize proxy array", &_state);
8205 0 : alglib_impl::ae_matrix_set_length(ptr, rows, cols, &_state);
8206 0 : alglib_impl::ae_state_clear(&_state);
8207 0 : }
8208 :
8209 0 : alglib::ae_int_t alglib::ae_matrix_wrapper::rows() const
8210 : {
8211 0 : if( ptr==NULL )
8212 0 : return 0;
8213 0 : return ptr->rows;
8214 : }
8215 :
8216 0 : alglib::ae_int_t alglib::ae_matrix_wrapper::cols() const
8217 : {
8218 0 : if( ptr==NULL )
8219 0 : return 0;
8220 0 : return ptr->cols;
8221 : }
8222 :
8223 0 : bool alglib::ae_matrix_wrapper::isempty() const
8224 : {
8225 0 : return rows()==0 || cols()==0;
8226 : }
8227 :
8228 0 : alglib::ae_int_t alglib::ae_matrix_wrapper::getstride() const
8229 : {
8230 0 : if( ptr==NULL )
8231 0 : return 0;
8232 0 : return ptr->stride;
8233 : }
8234 :
8235 0 : void alglib::ae_matrix_wrapper::attach_to(alglib_impl::x_matrix *new_ptr, alglib_impl::ae_state *_state)
8236 : {
8237 0 : if( ptr==&inner_mat )
8238 0 : ae_matrix_clear(ptr);
8239 0 : ptr = &inner_mat;
8240 0 : memset(ptr, 0, sizeof(*ptr));
8241 0 : ae_matrix_init_attach_to_x(ptr, new_ptr, _state, ae_false);
8242 0 : is_frozen_proxy = true;
8243 0 : }
8244 :
8245 0 : const alglib::ae_matrix_wrapper& alglib::ae_matrix_wrapper::assign(const alglib::ae_matrix_wrapper &rhs)
8246 : {
8247 : ae_int_t i;
8248 : jmp_buf _break_jump;
8249 : alglib_impl::ae_state _state;
8250 0 : if( this==&rhs )
8251 0 : return *this;
8252 0 : alglib_impl::ae_state_init(&_state);
8253 0 : if( setjmp(_break_jump) )
8254 : {
8255 : #if !defined(AE_NO_EXCEPTIONS)
8256 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8257 : #else
8258 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8259 : return *this;
8260 : #endif
8261 : }
8262 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8263 0 : ae_assert(ptr!=NULL, "ALGLIB: incorrect assignment to matrix (uninitialized destination)", &_state);
8264 0 : ae_assert(rhs.ptr!=NULL, "ALGLIB: incorrect assignment to array (uninitialized source)", &_state);
8265 0 : ae_assert(rhs.ptr->datatype==ptr->datatype, "ALGLIB: incorrect assignment to array (types dont match)", &_state);
8266 0 : if( is_frozen_proxy )
8267 : {
8268 0 : ae_assert(rhs.ptr->rows==ptr->rows, "ALGLIB: incorrect assignment to proxy array (sizes dont match)", &_state);
8269 0 : ae_assert(rhs.ptr->cols==ptr->cols, "ALGLIB: incorrect assignment to proxy array (sizes dont match)", &_state);
8270 : }
8271 0 : if( (rhs.ptr->rows!=ptr->rows) || (rhs.ptr->cols!=ptr->cols) )
8272 0 : ae_matrix_set_length(ptr, rhs.ptr->rows, rhs.ptr->cols, &_state);
8273 0 : for(i=0; i<ptr->rows; i++)
8274 0 : memcpy(ptr->ptr.pp_void[i], rhs.ptr->ptr.pp_void[i], ptr->cols*alglib_impl::ae_sizeof(ptr->datatype));
8275 0 : alglib_impl::ae_state_clear(&_state);
8276 0 : return *this;
8277 : }
8278 :
8279 0 : const alglib_impl::ae_matrix* alglib::ae_matrix_wrapper::c_ptr() const
8280 : {
8281 0 : return ptr;
8282 : }
8283 :
8284 0 : alglib_impl::ae_matrix* alglib::ae_matrix_wrapper::c_ptr()
8285 : {
8286 0 : return ptr;
8287 : }
8288 :
8289 0 : alglib::boolean_2d_array::boolean_2d_array():ae_matrix_wrapper(alglib_impl::DT_BOOL)
8290 : {
8291 0 : }
8292 :
8293 0 : alglib::boolean_2d_array::boolean_2d_array(const alglib::boolean_2d_array &rhs):ae_matrix_wrapper(rhs,alglib_impl::DT_BOOL)
8294 : {
8295 0 : }
8296 :
8297 0 : alglib::boolean_2d_array::boolean_2d_array(alglib_impl::ae_matrix *p):ae_matrix_wrapper(p,alglib_impl::DT_BOOL)
8298 : {
8299 0 : }
8300 :
8301 0 : alglib::boolean_2d_array::~boolean_2d_array()
8302 : {
8303 0 : }
8304 :
8305 0 : const alglib::boolean_2d_array& alglib::boolean_2d_array::operator=(const alglib::boolean_2d_array &rhs)
8306 : {
8307 0 : return static_cast<const boolean_2d_array&>(assign(rhs));
8308 : }
8309 :
8310 0 : const ae_bool& alglib::boolean_2d_array::operator()(ae_int_t i, ae_int_t j) const
8311 : {
8312 0 : return ptr->ptr.pp_bool[i][j];
8313 : }
8314 :
8315 0 : ae_bool& alglib::boolean_2d_array::operator()(ae_int_t i, ae_int_t j)
8316 : {
8317 0 : return ptr->ptr.pp_bool[i][j];
8318 : }
8319 :
8320 0 : const ae_bool* alglib::boolean_2d_array::operator[](ae_int_t i) const
8321 : {
8322 0 : return ptr->ptr.pp_bool[i];
8323 : }
8324 :
8325 0 : ae_bool* alglib::boolean_2d_array::operator[](ae_int_t i)
8326 : {
8327 0 : return ptr->ptr.pp_bool[i];
8328 : }
8329 :
8330 0 : void alglib::boolean_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const bool *pContent )
8331 : {
8332 : ae_int_t i, j;
8333 :
8334 : // setlength(), handle possible exception-free errors
8335 0 : setlength(irows, icols);
8336 0 : if( ptr==NULL || ptr->rows!=irows || ptr->cols!=icols )
8337 0 : return;
8338 :
8339 : // copy
8340 0 : for(i=0; i<irows; i++)
8341 0 : for(j=0; j<icols; j++)
8342 0 : ptr->ptr.pp_bool[i][j] = pContent[i*icols+j];
8343 : }
8344 :
8345 : #if !defined(AE_NO_EXCEPTIONS)
8346 0 : alglib::boolean_2d_array::boolean_2d_array(const char *s):ae_matrix_wrapper(s, alglib_impl::DT_BOOL)
8347 : {
8348 0 : }
8349 :
8350 0 : std::string alglib::boolean_2d_array::tostring() const
8351 : {
8352 0 : std::string result;
8353 : ae_int_t i;
8354 0 : if( isempty() )
8355 0 : return "[[]]";
8356 0 : result = "[";
8357 0 : for(i=0; i<rows(); i++)
8358 : {
8359 0 : if( i!=0 )
8360 0 : result += ",";
8361 0 : result += arraytostring(&operator()(i,0), cols());
8362 : }
8363 0 : result += "]";
8364 0 : return result;
8365 0 : }
8366 : #endif
8367 :
8368 0 : alglib::integer_2d_array::integer_2d_array():ae_matrix_wrapper(alglib_impl::DT_INT)
8369 : {
8370 0 : }
8371 :
8372 0 : alglib::integer_2d_array::integer_2d_array(const alglib::integer_2d_array &rhs):ae_matrix_wrapper(rhs,alglib_impl::DT_INT)
8373 : {
8374 0 : }
8375 :
8376 0 : alglib::integer_2d_array::integer_2d_array(alglib_impl::ae_matrix *p):ae_matrix_wrapper(p,alglib_impl::DT_INT)
8377 : {
8378 0 : }
8379 :
8380 0 : alglib::integer_2d_array::~integer_2d_array()
8381 : {
8382 0 : }
8383 :
8384 0 : const alglib::integer_2d_array& alglib::integer_2d_array::operator=(const alglib::integer_2d_array &rhs)
8385 : {
8386 0 : return static_cast<const integer_2d_array&>(assign(rhs));
8387 : }
8388 :
8389 0 : const alglib::ae_int_t& alglib::integer_2d_array::operator()(ae_int_t i, ae_int_t j) const
8390 : {
8391 0 : return ptr->ptr.pp_int[i][j];
8392 : }
8393 :
8394 0 : alglib::ae_int_t& alglib::integer_2d_array::operator()(ae_int_t i, ae_int_t j)
8395 : {
8396 0 : return ptr->ptr.pp_int[i][j];
8397 : }
8398 :
8399 0 : const alglib::ae_int_t* alglib::integer_2d_array::operator[](ae_int_t i) const
8400 : {
8401 0 : return ptr->ptr.pp_int[i];
8402 : }
8403 :
8404 0 : alglib::ae_int_t* alglib::integer_2d_array::operator[](ae_int_t i)
8405 : {
8406 0 : return ptr->ptr.pp_int[i];
8407 : }
8408 :
8409 0 : void alglib::integer_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const ae_int_t *pContent )
8410 : {
8411 : ae_int_t i, j;
8412 :
8413 : // setlength(), handle possible exception-free errors
8414 0 : setlength(irows, icols);
8415 0 : if( ptr==NULL || ptr->rows!=irows || ptr->cols!=icols )
8416 0 : return;
8417 :
8418 : // copy
8419 0 : for(i=0; i<irows; i++)
8420 0 : for(j=0; j<icols; j++)
8421 0 : ptr->ptr.pp_int[i][j] = pContent[i*icols+j];
8422 : }
8423 :
8424 : #if !defined(AE_NO_EXCEPTIONS)
8425 0 : alglib::integer_2d_array::integer_2d_array(const char *s):ae_matrix_wrapper(s, alglib_impl::DT_INT)
8426 : {
8427 0 : }
8428 :
8429 0 : std::string alglib::integer_2d_array::tostring() const
8430 : {
8431 0 : std::string result;
8432 : ae_int_t i;
8433 0 : if( isempty() )
8434 0 : return "[[]]";
8435 0 : result = "[";
8436 0 : for(i=0; i<rows(); i++)
8437 : {
8438 0 : if( i!=0 )
8439 0 : result += ",";
8440 0 : result += arraytostring(&operator()(i,0), cols());
8441 : }
8442 0 : result += "]";
8443 0 : return result;
8444 0 : }
8445 : #endif
8446 :
8447 0 : alglib::real_2d_array::real_2d_array():ae_matrix_wrapper(alglib_impl::DT_REAL)
8448 : {
8449 0 : }
8450 :
8451 0 : alglib::real_2d_array::real_2d_array(const alglib::real_2d_array &rhs):ae_matrix_wrapper(rhs,alglib_impl::DT_REAL)
8452 : {
8453 0 : }
8454 :
8455 0 : alglib::real_2d_array::real_2d_array(alglib_impl::ae_matrix *p):ae_matrix_wrapper(p,alglib_impl::DT_REAL)
8456 : {
8457 0 : }
8458 :
8459 0 : alglib::real_2d_array::~real_2d_array()
8460 : {
8461 0 : }
8462 :
8463 0 : const alglib::real_2d_array& alglib::real_2d_array::operator=(const alglib::real_2d_array &rhs)
8464 : {
8465 0 : return static_cast<const real_2d_array&>(assign(rhs));
8466 : }
8467 :
8468 0 : const double& alglib::real_2d_array::operator()(ae_int_t i, ae_int_t j) const
8469 : {
8470 0 : return ptr->ptr.pp_double[i][j];
8471 : }
8472 :
8473 0 : double& alglib::real_2d_array::operator()(ae_int_t i, ae_int_t j)
8474 : {
8475 0 : return ptr->ptr.pp_double[i][j];
8476 : }
8477 :
8478 0 : const double* alglib::real_2d_array::operator[](ae_int_t i) const
8479 : {
8480 0 : return ptr->ptr.pp_double[i];
8481 : }
8482 :
8483 0 : double* alglib::real_2d_array::operator[](ae_int_t i)
8484 : {
8485 0 : return ptr->ptr.pp_double[i];
8486 : }
8487 :
8488 0 : void alglib::real_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const double *pContent )
8489 : {
8490 : ae_int_t i, j;
8491 :
8492 : // setlength(), handle possible exception-free errors
8493 0 : setlength(irows, icols);
8494 0 : if( ptr==NULL || ptr->rows!=irows || ptr->cols!=icols )
8495 0 : return;
8496 :
8497 : // copy
8498 0 : for(i=0; i<irows; i++)
8499 0 : for(j=0; j<icols; j++)
8500 0 : ptr->ptr.pp_double[i][j] = pContent[i*icols+j];
8501 : }
8502 :
8503 0 : void alglib::real_2d_array::attach_to_ptr(ae_int_t irows, ae_int_t icols, double *pContent )
8504 : {
8505 : jmp_buf _break_jump;
8506 : alglib_impl::ae_state _state;
8507 : alglib_impl::x_matrix x;
8508 0 : alglib_impl::ae_state_init(&_state);
8509 0 : if( setjmp(_break_jump) )
8510 : {
8511 : #if !defined(AE_NO_EXCEPTIONS)
8512 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8513 : #else
8514 : ptr = NULL;
8515 : is_frozen_proxy = false;
8516 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8517 : return;
8518 : #endif
8519 : }
8520 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8521 0 : alglib_impl::ae_assert(!is_frozen_proxy, "ALGLIB: unable to attach proxy object to something else", &_state);
8522 0 : alglib_impl::ae_assert(irows>0&&icols>0, "ALGLIB: non-positive length for attach_to_ptr()", &_state);
8523 0 : x.rows = irows;
8524 0 : x.cols = icols;
8525 0 : x.stride = icols;
8526 0 : x.datatype = alglib_impl::DT_REAL;
8527 0 : x.owner = alglib_impl::OWN_CALLER;
8528 0 : x.last_action = alglib_impl::ACT_UNCHANGED;
8529 0 : x.x_ptr.p_ptr = pContent;
8530 0 : attach_to(&x, &_state);
8531 0 : ae_state_clear(&_state);
8532 0 : }
8533 :
8534 : #if !defined(AE_NO_EXCEPTIONS)
8535 0 : alglib::real_2d_array::real_2d_array(const char *s):ae_matrix_wrapper(s, alglib_impl::DT_REAL)
8536 : {
8537 0 : }
8538 :
8539 0 : std::string alglib::real_2d_array::tostring(int dps) const
8540 : {
8541 0 : std::string result;
8542 : ae_int_t i;
8543 0 : if( isempty() )
8544 0 : return "[[]]";
8545 0 : result = "[";
8546 0 : for(i=0; i<rows(); i++)
8547 : {
8548 0 : if( i!=0 )
8549 0 : result += ",";
8550 0 : result += arraytostring(&operator()(i,0), cols(), dps);
8551 : }
8552 0 : result += "]";
8553 0 : return result;
8554 0 : }
8555 : #endif
8556 :
8557 0 : alglib::complex_2d_array::complex_2d_array():ae_matrix_wrapper(alglib_impl::DT_COMPLEX)
8558 : {
8559 0 : }
8560 :
8561 0 : alglib::complex_2d_array::complex_2d_array(const alglib::complex_2d_array &rhs):ae_matrix_wrapper(rhs,alglib_impl::DT_COMPLEX)
8562 : {
8563 0 : }
8564 :
8565 0 : alglib::complex_2d_array::complex_2d_array(alglib_impl::ae_matrix *p):ae_matrix_wrapper(p,alglib_impl::DT_COMPLEX)
8566 : {
8567 0 : }
8568 :
8569 0 : alglib::complex_2d_array::~complex_2d_array()
8570 : {
8571 0 : }
8572 :
8573 0 : const alglib::complex_2d_array& alglib::complex_2d_array::operator=(const alglib::complex_2d_array &rhs)
8574 : {
8575 0 : return static_cast<const complex_2d_array&>(assign(rhs));
8576 : }
8577 :
8578 0 : const alglib::complex& alglib::complex_2d_array::operator()(ae_int_t i, ae_int_t j) const
8579 : {
8580 0 : return *((const alglib::complex*)(ptr->ptr.pp_complex[i]+j));
8581 : }
8582 :
8583 0 : alglib::complex& alglib::complex_2d_array::operator()(ae_int_t i, ae_int_t j)
8584 : {
8585 0 : return *((alglib::complex*)(ptr->ptr.pp_complex[i]+j));
8586 : }
8587 :
8588 0 : const alglib::complex* alglib::complex_2d_array::operator[](ae_int_t i) const
8589 : {
8590 0 : return (const alglib::complex*)(ptr->ptr.pp_complex[i]);
8591 : }
8592 :
8593 0 : alglib::complex* alglib::complex_2d_array::operator[](ae_int_t i)
8594 : {
8595 0 : return (alglib::complex*)(ptr->ptr.pp_complex[i]);
8596 : }
8597 :
8598 0 : void alglib::complex_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const alglib::complex *pContent )
8599 : {
8600 : ae_int_t i, j;
8601 :
8602 : // setlength(), handle possible exception-free errors
8603 0 : setlength(irows, icols);
8604 0 : if( ptr==NULL || ptr->rows!=irows || ptr->cols!=icols )
8605 0 : return;
8606 :
8607 : // copy
8608 0 : for(i=0; i<irows; i++)
8609 0 : for(j=0; j<icols; j++)
8610 : {
8611 0 : ptr->ptr.pp_complex[i][j].x = pContent[i*icols+j].x;
8612 0 : ptr->ptr.pp_complex[i][j].y = pContent[i*icols+j].y;
8613 : }
8614 : }
8615 :
8616 : #if !defined(AE_NO_EXCEPTIONS)
8617 0 : alglib::complex_2d_array::complex_2d_array(const char *s):ae_matrix_wrapper(s, alglib_impl::DT_COMPLEX)
8618 : {
8619 0 : }
8620 :
8621 0 : std::string alglib::complex_2d_array::tostring(int dps) const
8622 : {
8623 0 : std::string result;
8624 : ae_int_t i;
8625 0 : if( isempty() )
8626 0 : return "[[]]";
8627 0 : result = "[";
8628 0 : for(i=0; i<rows(); i++)
8629 : {
8630 0 : if( i!=0 )
8631 0 : result += ",";
8632 0 : result += arraytostring(&operator()(i,0), cols(), dps);
8633 : }
8634 0 : result += "]";
8635 0 : return result;
8636 0 : }
8637 : #endif
8638 :
8639 : /********************************************************************
8640 : Internal functions
8641 : ********************************************************************/
8642 8 : double alglib::get_aenv_nan()
8643 : {
8644 : double r;
8645 : alglib_impl::ae_state _alglib_env_state;
8646 8 : alglib_impl::ae_state_init(&_alglib_env_state);
8647 8 : r = _alglib_env_state.v_nan;
8648 8 : alglib_impl::ae_state_clear(&_alglib_env_state);
8649 8 : return r;
8650 : }
8651 :
8652 8 : double alglib::get_aenv_posinf()
8653 : {
8654 : double r;
8655 : alglib_impl::ae_state _alglib_env_state;
8656 8 : alglib_impl::ae_state_init(&_alglib_env_state);
8657 8 : r = _alglib_env_state.v_posinf;
8658 8 : alglib_impl::ae_state_clear(&_alglib_env_state);
8659 8 : return r;
8660 : }
8661 :
8662 8 : double alglib::get_aenv_neginf()
8663 : {
8664 : double r;
8665 : alglib_impl::ae_state _alglib_env_state;
8666 8 : alglib_impl::ae_state_init(&_alglib_env_state);
8667 8 : r = _alglib_env_state.v_neginf;
8668 8 : alglib_impl::ae_state_clear(&_alglib_env_state);
8669 8 : return r;
8670 : }
8671 :
8672 0 : alglib::ae_int_t alglib::my_stricmp(const char *s1, const char *s2)
8673 : {
8674 : int c1, c2;
8675 :
8676 : //
8677 : // handle special cases
8678 : //
8679 0 : if(s1==NULL && s2!=NULL)
8680 0 : return -1;
8681 0 : if(s1!=NULL && s2==NULL)
8682 0 : return +1;
8683 0 : if(s1==NULL && s2==NULL)
8684 0 : return 0;
8685 :
8686 : //
8687 : // compare
8688 : //
8689 : for (;;)
8690 : {
8691 0 : c1 = *s1;
8692 0 : c2 = *s2;
8693 0 : s1++;
8694 0 : s2++;
8695 0 : if( c1==0 )
8696 0 : return c2==0 ? 0 : -1;
8697 0 : if( c2==0 )
8698 0 : return c1==0 ? 0 : +1;
8699 0 : c1 = tolower(c1);
8700 0 : c2 = tolower(c2);
8701 0 : if( c1<c2 )
8702 0 : return -1;
8703 0 : if( c1>c2 )
8704 0 : return +1;
8705 : }
8706 : }
8707 :
8708 : #if !defined(AE_NO_EXCEPTIONS)
8709 : //
8710 : // This function filters out all spaces from the string.
8711 : // It returns string allocated with ae_malloc().
8712 : // On allocaction failure returns NULL.
8713 : //
8714 0 : char* alglib::filter_spaces(const char *s)
8715 : {
8716 : size_t i, n;
8717 : char *r;
8718 : char *r0;
8719 0 : n = strlen(s);
8720 0 : r = (char*)alglib_impl::ae_malloc(n+1,NULL);
8721 0 : if( r==NULL )
8722 0 : return r;
8723 0 : for(i=0,r0=r; i<=n; i++,s++)
8724 0 : if( !isspace(*s) )
8725 : {
8726 0 : *r0 = *s;
8727 0 : r0++;
8728 : }
8729 0 : return r;
8730 : }
8731 :
8732 0 : void alglib::str_vector_create(const char *src, bool match_head_only, std::vector<const char*> *p_vec)
8733 : {
8734 : //
8735 : // parse beginning of the string.
8736 : // try to handle "[]" string
8737 : //
8738 0 : p_vec->clear();
8739 0 : if( *src!='[' )
8740 0 : _ALGLIB_CPP_EXCEPTION("Incorrect initializer for vector");
8741 0 : src++;
8742 0 : if( *src==']' )
8743 0 : return;
8744 0 : p_vec->push_back(src);
8745 : for(;;)
8746 : {
8747 0 : if( *src==0 )
8748 0 : _ALGLIB_CPP_EXCEPTION("Incorrect initializer for vector");
8749 0 : if( *src==']' )
8750 : {
8751 0 : if( src[1]==0 || !match_head_only)
8752 0 : return;
8753 0 : _ALGLIB_CPP_EXCEPTION("Incorrect initializer for vector");
8754 : }
8755 0 : if( *src==',' )
8756 : {
8757 0 : p_vec->push_back(src+1);
8758 0 : src++;
8759 0 : continue;
8760 : }
8761 0 : src++;
8762 : }
8763 : }
8764 :
8765 0 : void alglib::str_matrix_create(const char *src, std::vector< std::vector<const char*> > *p_mat)
8766 : {
8767 0 : p_mat->clear();
8768 :
8769 : //
8770 : // Try to handle "[[]]" string
8771 : //
8772 0 : if( strcmp(src, "[[]]")==0 )
8773 0 : return;
8774 :
8775 : //
8776 : // Parse non-empty string
8777 : //
8778 0 : if( *src!='[' )
8779 0 : _ALGLIB_CPP_EXCEPTION("Incorrect initializer for matrix");
8780 0 : src++;
8781 : for(;;)
8782 : {
8783 0 : p_mat->push_back(std::vector<const char*>());
8784 0 : str_vector_create(src, false, &p_mat->back());
8785 0 : if( p_mat->back().size()==0 || p_mat->back().size()!=(*p_mat)[0].size() )
8786 0 : _ALGLIB_CPP_EXCEPTION("Incorrect initializer for matrix");
8787 0 : src = strchr(src, ']');
8788 0 : if( src==NULL )
8789 0 : _ALGLIB_CPP_EXCEPTION("Incorrect initializer for matrix");
8790 0 : src++;
8791 0 : if( *src==',' )
8792 : {
8793 0 : src++;
8794 0 : continue;
8795 : }
8796 0 : if( *src==']' )
8797 0 : break;
8798 0 : _ALGLIB_CPP_EXCEPTION("Incorrect initializer for matrix");
8799 : }
8800 0 : src++;
8801 0 : if( *src!=0 )
8802 0 : _ALGLIB_CPP_EXCEPTION("Incorrect initializer for matrix");
8803 : }
8804 :
8805 0 : ae_bool alglib::parse_bool_delim(const char *s, const char *delim)
8806 : {
8807 : const char *p;
8808 : char buf[8];
8809 :
8810 : // try to parse false
8811 0 : p = "false";
8812 0 : memset(buf, 0, sizeof(buf));
8813 0 : strncpy(buf, s, strlen(p));
8814 0 : if( my_stricmp(buf, p)==0 )
8815 : {
8816 0 : if( s[strlen(p)]==0 || strchr(delim,s[strlen(p)])==NULL )
8817 0 : _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8818 0 : return ae_false;
8819 : }
8820 :
8821 : // try to parse true
8822 0 : p = "true";
8823 0 : memset(buf, 0, sizeof(buf));
8824 0 : strncpy(buf, s, strlen(p));
8825 0 : if( my_stricmp(buf, p)==0 )
8826 : {
8827 0 : if( s[strlen(p)]==0 || strchr(delim,s[strlen(p)])==NULL )
8828 0 : _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8829 0 : return ae_true;
8830 : }
8831 :
8832 : // error
8833 0 : _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8834 : }
8835 :
8836 0 : alglib::ae_int_t alglib::parse_int_delim(const char *s, const char *delim)
8837 : {
8838 : const char *p;
8839 : long long_val;
8840 : volatile ae_int_t ae_val;
8841 :
8842 0 : p = s;
8843 :
8844 : //
8845 : // check string structure:
8846 : // * leading sign
8847 : // * at least one digit
8848 : // * delimiter
8849 : //
8850 0 : if( *s=='-' || *s=='+' )
8851 0 : s++;
8852 0 : if( *s==0 || strchr("1234567890",*s)==NULL)
8853 0 : _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8854 0 : while( *s!=0 && strchr("1234567890",*s)!=NULL )
8855 0 : s++;
8856 0 : if( *s==0 || strchr(delim,*s)==NULL )
8857 0 : _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8858 :
8859 : // convert and ensure that value fits into ae_int_t
8860 0 : s = p;
8861 0 : long_val = atol(s);
8862 0 : ae_val = long_val;
8863 0 : if( ae_val!=long_val )
8864 0 : _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8865 0 : return ae_val;
8866 : }
8867 :
8868 0 : bool alglib::_parse_real_delim(const char *s, const char *delim, double *result, const char **new_s)
8869 : {
8870 : const char *p;
8871 : char *t;
8872 : bool has_digits;
8873 : char buf[64];
8874 : int isign;
8875 : lconv *loc;
8876 :
8877 0 : p = s;
8878 :
8879 : //
8880 : // check string structure and decide what to do
8881 : //
8882 0 : isign = 1;
8883 0 : if( *s=='-' || *s=='+' )
8884 : {
8885 0 : isign = *s=='-' ? -1 : +1;
8886 0 : s++;
8887 : }
8888 0 : memset(buf, 0, sizeof(buf));
8889 0 : strncpy(buf, s, 3);
8890 0 : if( my_stricmp(buf,"nan")!=0 && my_stricmp(buf,"inf")!=0 )
8891 : {
8892 : //
8893 : // [sign] [ddd] [.] [ddd] [e|E[sign]ddd]
8894 : //
8895 0 : has_digits = false;
8896 0 : if( *s!=0 && strchr("1234567890",*s)!=NULL )
8897 : {
8898 0 : has_digits = true;
8899 0 : while( *s!=0 && strchr("1234567890",*s)!=NULL )
8900 0 : s++;
8901 : }
8902 0 : if( *s=='.' )
8903 0 : s++;
8904 0 : if( *s!=0 && strchr("1234567890",*s)!=NULL )
8905 : {
8906 0 : has_digits = true;
8907 0 : while( *s!=0 && strchr("1234567890",*s)!=NULL )
8908 0 : s++;
8909 : }
8910 0 : if (!has_digits )
8911 0 : return false;
8912 0 : if( *s=='e' || *s=='E' )
8913 : {
8914 0 : s++;
8915 0 : if( *s=='-' || *s=='+' )
8916 0 : s++;
8917 0 : if( *s==0 || strchr("1234567890",*s)==NULL )
8918 0 : return false;
8919 0 : while( *s!=0 && strchr("1234567890",*s)!=NULL )
8920 0 : s++;
8921 : }
8922 0 : if( *s==0 || strchr(delim,*s)==NULL )
8923 0 : return false;
8924 0 : *new_s = s;
8925 :
8926 : //
8927 : // finite value conversion
8928 : //
8929 0 : if( *new_s-p>=(int)sizeof(buf) )
8930 0 : return false;
8931 0 : strncpy(buf, p, (size_t)(*new_s-p));
8932 0 : buf[*new_s-p] = 0;
8933 0 : loc = localeconv();
8934 0 : t = strchr(buf,'.');
8935 0 : if( t!=NULL )
8936 0 : *t = *loc->decimal_point;
8937 0 : *result = atof(buf);
8938 0 : return true;
8939 : }
8940 : else
8941 : {
8942 : //
8943 : // check delimiter and update *new_s
8944 : //
8945 0 : s += 3;
8946 0 : if( *s==0 || strchr(delim,*s)==NULL )
8947 0 : return false;
8948 0 : *new_s = s;
8949 :
8950 : //
8951 : // NAN, INF conversion
8952 : //
8953 0 : if( my_stricmp(buf,"nan")==0 )
8954 0 : *result = fp_nan;
8955 0 : if( my_stricmp(buf,"inf")==0 )
8956 0 : *result = isign>0 ? fp_posinf : fp_neginf;
8957 0 : return true;
8958 : }
8959 : }
8960 :
8961 0 : double alglib::parse_real_delim(const char *s, const char *delim)
8962 : {
8963 : double result;
8964 : const char *new_s;
8965 0 : if( !_parse_real_delim(s, delim, &result, &new_s) )
8966 0 : _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8967 0 : return result;
8968 : }
8969 :
8970 0 : alglib::complex alglib::parse_complex_delim(const char *s, const char *delim)
8971 : {
8972 : double d_result;
8973 : const char *new_s;
8974 0 : alglib::complex c_result;
8975 :
8976 : // parse as real value
8977 0 : if( _parse_real_delim(s, delim, &d_result, &new_s) )
8978 0 : return d_result;
8979 :
8980 : // parse as "a+bi" or "a-bi"
8981 0 : if( _parse_real_delim(s, "+-", &c_result.x, &new_s) )
8982 : {
8983 0 : s = new_s;
8984 0 : if( !_parse_real_delim(s, "i", &c_result.y, &new_s) )
8985 0 : _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8986 0 : s = new_s+1;
8987 0 : if( *s==0 || strchr(delim,*s)==NULL )
8988 0 : _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8989 0 : return c_result;
8990 : }
8991 :
8992 : // parse as complex value "bi+a" or "bi-a"
8993 0 : if( _parse_real_delim(s, "i", &c_result.y, &new_s) )
8994 : {
8995 0 : s = new_s+1;
8996 0 : if( *s==0 )
8997 0 : _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8998 0 : if( strchr(delim,*s)!=NULL )
8999 : {
9000 0 : c_result.x = 0;
9001 0 : return c_result;
9002 : }
9003 0 : if( strchr("+-",*s)!=NULL )
9004 : {
9005 0 : if( !_parse_real_delim(s, delim, &c_result.x, &new_s) )
9006 0 : _ALGLIB_CPP_EXCEPTION("Cannot parse value");
9007 0 : return c_result;
9008 : }
9009 0 : _ALGLIB_CPP_EXCEPTION("Cannot parse value");
9010 : }
9011 :
9012 : // error
9013 0 : _ALGLIB_CPP_EXCEPTION("Cannot parse value");
9014 : }
9015 :
9016 0 : std::string alglib::arraytostring(const bool *ptr, ae_int_t n)
9017 : {
9018 0 : std::string result;
9019 : ae_int_t i;
9020 0 : result = "[";
9021 0 : for(i=0; i<n; i++)
9022 : {
9023 0 : if( i!=0 )
9024 0 : result += ",";
9025 0 : result += ptr[i] ? "true" : "false";
9026 : }
9027 0 : result += "]";
9028 0 : return result;
9029 0 : }
9030 :
9031 0 : std::string alglib::arraytostring(const ae_int_t *ptr, ae_int_t n)
9032 : {
9033 0 : std::string result;
9034 : ae_int_t i;
9035 : char buf[64];
9036 0 : result = "[";
9037 0 : for(i=0; i<n; i++)
9038 : {
9039 0 : if( sprintf(buf, i==0 ? "%ld" : ",%ld", long(ptr[i]))>=(int)sizeof(buf) )
9040 0 : _ALGLIB_CPP_EXCEPTION("arraytostring(): buffer overflow");
9041 0 : result += buf;
9042 : }
9043 0 : result += "]";
9044 0 : return result;
9045 0 : }
9046 :
9047 0 : std::string alglib::arraytostring(const double *ptr, ae_int_t n, int _dps)
9048 : {
9049 0 : std::string result;
9050 : ae_int_t i;
9051 : char buf[64];
9052 : char mask1[64];
9053 : char mask2[64];
9054 0 : int dps = _dps>=0 ? _dps : -_dps;
9055 0 : result = "[";
9056 0 : if( sprintf(mask1, "%%.%d%s", dps, _dps>=0 ? "f" : "e")>=(int)sizeof(mask1) )
9057 0 : _ALGLIB_CPP_EXCEPTION("arraytostring(): buffer overflow");
9058 0 : if( sprintf(mask2, ",%s", mask1)>=(int)sizeof(mask2) )
9059 0 : _ALGLIB_CPP_EXCEPTION("arraytostring(): buffer overflow");
9060 0 : for(i=0; i<n; i++)
9061 : {
9062 0 : buf[0] = 0;
9063 0 : if( fp_isfinite(ptr[i]) )
9064 : {
9065 0 : if( sprintf(buf, i==0 ? mask1 : mask2, double(ptr[i]))>=(int)sizeof(buf) )
9066 0 : _ALGLIB_CPP_EXCEPTION("arraytostring(): buffer overflow");
9067 : }
9068 0 : else if( fp_isnan(ptr[i]) )
9069 0 : strcpy(buf, i==0 ? "NAN" : ",NAN");
9070 0 : else if( fp_isposinf(ptr[i]) )
9071 0 : strcpy(buf, i==0 ? "+INF" : ",+INF");
9072 0 : else if( fp_isneginf(ptr[i]) )
9073 0 : strcpy(buf, i==0 ? "-INF" : ",-INF");
9074 0 : result += buf;
9075 : }
9076 0 : result += "]";
9077 0 : return result;
9078 0 : }
9079 :
9080 0 : std::string alglib::arraytostring(const alglib::complex *ptr, ae_int_t n, int dps)
9081 : {
9082 0 : std::string result;
9083 : ae_int_t i;
9084 0 : result = "[";
9085 0 : for(i=0; i<n; i++)
9086 : {
9087 0 : if( i!=0 )
9088 0 : result += ",";
9089 0 : result += ptr[i].tostring(dps);
9090 : }
9091 0 : result += "]";
9092 0 : return result;
9093 0 : }
9094 : #endif
9095 :
9096 :
9097 : /********************************************************************
9098 : standard functions
9099 : ********************************************************************/
9100 0 : int alglib::sign(double x)
9101 : {
9102 0 : if( x>0 ) return 1;
9103 0 : if( x<0 ) return -1;
9104 0 : return 0;
9105 : }
9106 :
9107 0 : double alglib::randomreal()
9108 : {
9109 0 : int i1 = rand();
9110 0 : int i2 = rand();
9111 0 : double mx = (double)(RAND_MAX)+1.0;
9112 0 : volatile double tmp0 = i2/mx;
9113 0 : volatile double tmp1 = i1+tmp0;
9114 0 : return tmp1/mx;
9115 : }
9116 :
9117 0 : alglib::ae_int_t alglib::randominteger(alglib::ae_int_t maxv)
9118 : {
9119 0 : return ((alglib::ae_int_t)rand())%maxv;
9120 : }
9121 :
9122 0 : int alglib::round(double x)
9123 0 : { return int(floor(x+0.5)); }
9124 :
9125 0 : int alglib::trunc(double x)
9126 0 : { return int(x>0 ? floor(x) : ceil(x)); }
9127 :
9128 0 : int alglib::ifloor(double x)
9129 0 : { return int(floor(x)); }
9130 :
9131 0 : int alglib::iceil(double x)
9132 0 : { return int(ceil(x)); }
9133 :
9134 0 : double alglib::pi()
9135 0 : { return 3.14159265358979323846; }
9136 :
9137 0 : double alglib::sqr(double x)
9138 0 : { return x*x; }
9139 :
9140 0 : int alglib::maxint(int m1, int m2)
9141 : {
9142 0 : return m1>m2 ? m1 : m2;
9143 : }
9144 :
9145 0 : int alglib::minint(int m1, int m2)
9146 : {
9147 0 : return m1>m2 ? m2 : m1;
9148 : }
9149 :
9150 0 : double alglib::maxreal(double m1, double m2)
9151 : {
9152 0 : return m1>m2 ? m1 : m2;
9153 : }
9154 :
9155 0 : double alglib::minreal(double m1, double m2)
9156 : {
9157 0 : return m1>m2 ? m2 : m1;
9158 : }
9159 :
9160 0 : bool alglib::fp_eq(double v1, double v2)
9161 : {
9162 : // IEEE-strict floating point comparison
9163 0 : volatile double x = v1;
9164 0 : volatile double y = v2;
9165 0 : return x==y;
9166 : }
9167 :
9168 0 : bool alglib::fp_neq(double v1, double v2)
9169 : {
9170 : // IEEE-strict floating point comparison
9171 0 : return !fp_eq(v1,v2);
9172 : }
9173 :
9174 0 : bool alglib::fp_less(double v1, double v2)
9175 : {
9176 : // IEEE-strict floating point comparison
9177 0 : volatile double x = v1;
9178 0 : volatile double y = v2;
9179 0 : return x<y;
9180 : }
9181 :
9182 0 : bool alglib::fp_less_eq(double v1, double v2)
9183 : {
9184 : // IEEE-strict floating point comparison
9185 0 : volatile double x = v1;
9186 0 : volatile double y = v2;
9187 0 : return x<=y;
9188 : }
9189 :
9190 0 : bool alglib::fp_greater(double v1, double v2)
9191 : {
9192 : // IEEE-strict floating point comparison
9193 0 : volatile double x = v1;
9194 0 : volatile double y = v2;
9195 0 : return x>y;
9196 : }
9197 :
9198 0 : bool alglib::fp_greater_eq(double v1, double v2)
9199 : {
9200 : // IEEE-strict floating point comparison
9201 0 : volatile double x = v1;
9202 0 : volatile double y = v2;
9203 0 : return x>=y;
9204 : }
9205 :
9206 0 : bool alglib::fp_isnan(double x)
9207 : {
9208 0 : return alglib_impl::ae_isnan_stateless(x,endianness);
9209 : }
9210 :
9211 0 : bool alglib::fp_isposinf(double x)
9212 : {
9213 0 : return alglib_impl::ae_isposinf_stateless(x,endianness);
9214 : }
9215 :
9216 0 : bool alglib::fp_isneginf(double x)
9217 : {
9218 0 : return alglib_impl::ae_isneginf_stateless(x,endianness);
9219 : }
9220 :
9221 0 : bool alglib::fp_isinf(double x)
9222 : {
9223 0 : return alglib_impl::ae_isinf_stateless(x,endianness);
9224 : }
9225 :
9226 0 : bool alglib::fp_isfinite(double x)
9227 : {
9228 0 : return alglib_impl::ae_isfinite_stateless(x,endianness);
9229 : }
9230 :
9231 : /********************************************************************
9232 : CSV functions
9233 : ********************************************************************/
9234 : #if !defined(AE_NO_EXCEPTIONS)
9235 0 : void alglib::read_csv(const char *filename, char separator, int flags, alglib::real_2d_array &out)
9236 : {
9237 : int flag;
9238 :
9239 : //
9240 : // Parameters
9241 : //
9242 0 : bool skip_first_row = (flags&CSV_SKIP_HEADERS)!=0;
9243 :
9244 : //
9245 : // Prepare empty output array
9246 : //
9247 0 : out.setlength(0,0);
9248 :
9249 : //
9250 : // Open file, determine size, read contents
9251 : //
9252 0 : FILE *f_in = fopen(filename, "rb");
9253 0 : if( f_in==NULL )
9254 0 : _ALGLIB_CPP_EXCEPTION("read_csv: unable to open input file");
9255 0 : flag = fseek(f_in, 0, SEEK_END);
9256 0 : AE_CRITICAL_ASSERT(flag==0);
9257 0 : long int _filesize = ftell(f_in);
9258 0 : AE_CRITICAL_ASSERT(_filesize>=0);
9259 0 : if( _filesize==0 )
9260 : {
9261 : // empty file, return empty array, success
9262 0 : fclose(f_in);
9263 0 : return;
9264 : }
9265 0 : size_t filesize = _filesize;
9266 0 : std::vector<char> v_buf;
9267 0 : v_buf.resize(filesize+2, 0);
9268 0 : char *p_buf = &v_buf[0];
9269 0 : flag = fseek(f_in, 0, SEEK_SET);
9270 0 : AE_CRITICAL_ASSERT(flag==0);
9271 0 : size_t bytes_read = fread ((void*)p_buf, 1, filesize, f_in);
9272 0 : AE_CRITICAL_ASSERT(bytes_read==filesize);
9273 0 : fclose(f_in);
9274 :
9275 : //
9276 : // Normalize file contents:
9277 : // * replace 0x0 by spaces
9278 : // * remove trailing spaces and newlines
9279 : // * append trailing '\n' and '\0' characters
9280 : // Return if file contains only spaces/newlines.
9281 : //
9282 0 : for(size_t i=0; i<filesize; i++)
9283 0 : if( p_buf[i]==0 )
9284 0 : p_buf[i] = ' ';
9285 0 : for(; filesize>0; )
9286 : {
9287 0 : char c = p_buf[filesize-1];
9288 0 : if( c==' ' || c=='\t' || c=='\n' || c=='\r' )
9289 : {
9290 0 : filesize--;
9291 0 : continue;
9292 : }
9293 0 : break;
9294 : }
9295 0 : if( filesize==0 )
9296 0 : return;
9297 0 : p_buf[filesize+0] = '\n';
9298 0 : p_buf[filesize+1] = '\0';
9299 0 : filesize+=2;
9300 :
9301 : //
9302 : // Scan dataset.
9303 : //
9304 0 : size_t rows_count = 0, cols_count = 0, max_length = 0;
9305 0 : std::vector<size_t> offsets, lengths;
9306 0 : for(size_t row_start=0; p_buf[row_start]!=0x0; )
9307 : {
9308 : // determine row length
9309 : size_t row_length;
9310 0 : for(row_length=0; p_buf[row_start+row_length]!='\n'; row_length++);
9311 :
9312 : // determine cols count, perform integrity check
9313 0 : size_t cur_cols_cnt=1;
9314 0 : for(size_t idx=0; idx<row_length; idx++)
9315 0 : if( p_buf[row_start+idx]==separator )
9316 0 : cur_cols_cnt++;
9317 0 : if( cols_count>0 && cols_count!=cur_cols_cnt )
9318 0 : _ALGLIB_CPP_EXCEPTION("read_csv: non-rectangular contents, rows have different sizes");
9319 0 : cols_count = cur_cols_cnt;
9320 :
9321 : // store offsets and lengths of the fields
9322 0 : size_t cur_offs = 0;
9323 0 : for(size_t idx=0; idx<row_length+1; idx++)
9324 0 : if( p_buf[row_start+idx]==separator || p_buf[row_start+idx]=='\n' )
9325 : {
9326 0 : offsets.push_back(row_start+cur_offs);
9327 0 : lengths.push_back(idx-cur_offs);
9328 0 : max_length = idx-cur_offs>max_length ? idx-cur_offs : max_length;
9329 0 : cur_offs = idx+1;
9330 : }
9331 :
9332 : // advance row start
9333 0 : rows_count++;
9334 0 : row_start = row_start+row_length+1;
9335 : }
9336 0 : AE_CRITICAL_ASSERT(rows_count>=1);
9337 0 : AE_CRITICAL_ASSERT(cols_count>=1);
9338 0 : AE_CRITICAL_ASSERT(cols_count*rows_count==offsets.size());
9339 0 : AE_CRITICAL_ASSERT(cols_count*rows_count==lengths.size());
9340 0 : if( rows_count==1 && skip_first_row ) // empty output, return
9341 0 : return;
9342 :
9343 : //
9344 : // Convert
9345 : //
9346 0 : size_t row0 = skip_first_row ? 1 : 0;
9347 0 : size_t row1 = rows_count;
9348 0 : lconv *loc = localeconv();
9349 0 : out.setlength(row1-row0, cols_count);
9350 0 : for(size_t ridx=row0; ridx<row1; ridx++)
9351 0 : for(size_t cidx=0; cidx<cols_count; cidx++)
9352 : {
9353 0 : char *p_field = p_buf+offsets[ridx*cols_count+cidx];
9354 0 : size_t field_len = lengths[ridx*cols_count+cidx];
9355 0 : for(size_t idx=0; idx<field_len; idx++)
9356 0 : if( p_field[idx]=='.' || p_field[idx]==',' )
9357 0 : p_field[idx] = *loc->decimal_point;
9358 0 : out[ridx-row0][cidx] = atof(p_field);
9359 : }
9360 0 : }
9361 : #endif
9362 :
9363 :
9364 :
9365 : /********************************************************************
9366 : Trace functions
9367 : ********************************************************************/
9368 0 : void alglib::trace_file(std::string tags, std::string filename)
9369 : {
9370 0 : alglib_impl::ae_trace_file(tags.c_str(), filename.c_str());
9371 0 : }
9372 :
9373 0 : void alglib::trace_disable()
9374 : {
9375 0 : alglib_impl::ae_trace_disable();
9376 0 : }
9377 :
9378 :
9379 :
9380 : /////////////////////////////////////////////////////////////////////////
9381 : //
9382 : // THIS SECTIONS CONTAINS OPTIMIZED LINEAR ALGEBRA CODE
9383 : // IT IS SHARED BETWEEN C++ AND PURE C LIBRARIES
9384 : //
9385 : /////////////////////////////////////////////////////////////////////////
9386 : namespace alglib_impl
9387 : {
9388 : #define alglib_simd_alignment 16
9389 :
9390 : #define alglib_r_block 32
9391 : #define alglib_half_r_block 16
9392 : #define alglib_twice_r_block 64
9393 :
9394 : #define alglib_c_block 16
9395 : #define alglib_half_c_block 8
9396 : #define alglib_twice_c_block 32
9397 :
9398 :
9399 : /********************************************************************
9400 : This subroutine calculates fast 32x32 real matrix-vector product:
9401 :
9402 : y := beta*y + alpha*A*x
9403 :
9404 : using either generic C code or native optimizations (if available)
9405 :
9406 : IMPORTANT:
9407 : * A must be stored in row-major order,
9408 : stride is alglib_r_block,
9409 : aligned on alglib_simd_alignment boundary
9410 : * X must be aligned on alglib_simd_alignment boundary
9411 : * Y may be non-aligned
9412 : ********************************************************************/
9413 0 : void _ialglib_mv_32(const double *a, const double *x, double *y, ae_int_t stride, double alpha, double beta)
9414 : {
9415 : ae_int_t i, k;
9416 : const double *pa0, *pa1, *pb;
9417 :
9418 0 : pa0 = a;
9419 0 : pa1 = a+alglib_r_block;
9420 0 : pb = x;
9421 0 : for(i=0; i<16; i++)
9422 : {
9423 0 : double v0 = 0, v1 = 0;
9424 0 : for(k=0; k<4; k++)
9425 : {
9426 0 : v0 += pa0[0]*pb[0];
9427 0 : v1 += pa1[0]*pb[0];
9428 0 : v0 += pa0[1]*pb[1];
9429 0 : v1 += pa1[1]*pb[1];
9430 0 : v0 += pa0[2]*pb[2];
9431 0 : v1 += pa1[2]*pb[2];
9432 0 : v0 += pa0[3]*pb[3];
9433 0 : v1 += pa1[3]*pb[3];
9434 0 : v0 += pa0[4]*pb[4];
9435 0 : v1 += pa1[4]*pb[4];
9436 0 : v0 += pa0[5]*pb[5];
9437 0 : v1 += pa1[5]*pb[5];
9438 0 : v0 += pa0[6]*pb[6];
9439 0 : v1 += pa1[6]*pb[6];
9440 0 : v0 += pa0[7]*pb[7];
9441 0 : v1 += pa1[7]*pb[7];
9442 0 : pa0 += 8;
9443 0 : pa1 += 8;
9444 0 : pb += 8;
9445 : }
9446 0 : y[0] = beta*y[0]+alpha*v0;
9447 0 : y[stride] = beta*y[stride]+alpha*v1;
9448 :
9449 : /*
9450 : * now we've processed rows I and I+1,
9451 : * pa0 and pa1 are pointing to rows I+1 and I+2.
9452 : * move to I+2 and I+3.
9453 : */
9454 0 : pa0 += alglib_r_block;
9455 0 : pa1 += alglib_r_block;
9456 0 : pb = x;
9457 0 : y+=2*stride;
9458 : }
9459 0 : }
9460 :
9461 :
9462 : /*************************************************************************
9463 : This function calculates MxN real matrix-vector product:
9464 :
9465 : y := beta*y + alpha*A*x
9466 :
9467 : using generic C code. It calls _ialglib_mv_32 if both M=32 and N=32.
9468 :
9469 : If beta is zero, we do not use previous values of y (they are overwritten
9470 : by alpha*A*x without ever being read). If alpha is zero, no matrix-vector
9471 : product is calculated (only beta is updated); however, this update is not
9472 : efficient and this function should NOT be used for multiplication of
9473 : vector and scalar.
9474 :
9475 : IMPORTANT:
9476 : * 0<=M<=alglib_r_block, 0<=N<=alglib_r_block
9477 : * A must be stored in row-major order with stride equal to alglib_r_block
9478 : *************************************************************************/
9479 0 : void _ialglib_rmv(ae_int_t m, ae_int_t n, const double *a, const double *x, double *y, ae_int_t stride, double alpha, double beta)
9480 : {
9481 : /*
9482 : * Handle special cases:
9483 : * - alpha is zero or n is zero
9484 : * - m is zero
9485 : */
9486 0 : if( m==0 )
9487 0 : return;
9488 0 : if( alpha==0.0 || n==0 )
9489 : {
9490 : ae_int_t i;
9491 0 : if( beta==0.0 )
9492 : {
9493 0 : for(i=0; i<m; i++)
9494 : {
9495 0 : *y = 0.0;
9496 0 : y += stride;
9497 : }
9498 : }
9499 : else
9500 : {
9501 0 : for(i=0; i<m; i++)
9502 : {
9503 0 : *y *= beta;
9504 0 : y += stride;
9505 : }
9506 : }
9507 0 : return;
9508 : }
9509 :
9510 : /*
9511 : * Handle general case: nonzero alpha, n and m
9512 : *
9513 : */
9514 0 : if( m==32 && n==32 )
9515 : {
9516 : /*
9517 : * 32x32, may be we have something better than general implementation
9518 : */
9519 0 : _ialglib_mv_32(a, x, y, stride, alpha, beta);
9520 : }
9521 : else
9522 : {
9523 : ae_int_t i, k, m2, n8, n2, ntrail2;
9524 : const double *pa0, *pa1, *pb;
9525 :
9526 : /*
9527 : * First M/2 rows of A are processed in pairs.
9528 : * optimized code is used.
9529 : */
9530 0 : m2 = m/2;
9531 0 : n8 = n/8;
9532 0 : ntrail2 = (n-8*n8)/2;
9533 0 : for(i=0; i<m2; i++)
9534 : {
9535 0 : double v0 = 0, v1 = 0;
9536 :
9537 : /*
9538 : * 'a' points to the part of the matrix which
9539 : * is not processed yet
9540 : */
9541 0 : pb = x;
9542 0 : pa0 = a;
9543 0 : pa1 = a+alglib_r_block;
9544 0 : a += alglib_twice_r_block;
9545 :
9546 : /*
9547 : * 8 elements per iteration
9548 : */
9549 0 : for(k=0; k<n8; k++)
9550 : {
9551 0 : v0 += pa0[0]*pb[0];
9552 0 : v1 += pa1[0]*pb[0];
9553 0 : v0 += pa0[1]*pb[1];
9554 0 : v1 += pa1[1]*pb[1];
9555 0 : v0 += pa0[2]*pb[2];
9556 0 : v1 += pa1[2]*pb[2];
9557 0 : v0 += pa0[3]*pb[3];
9558 0 : v1 += pa1[3]*pb[3];
9559 0 : v0 += pa0[4]*pb[4];
9560 0 : v1 += pa1[4]*pb[4];
9561 0 : v0 += pa0[5]*pb[5];
9562 0 : v1 += pa1[5]*pb[5];
9563 0 : v0 += pa0[6]*pb[6];
9564 0 : v1 += pa1[6]*pb[6];
9565 0 : v0 += pa0[7]*pb[7];
9566 0 : v1 += pa1[7]*pb[7];
9567 0 : pa0 += 8;
9568 0 : pa1 += 8;
9569 0 : pb += 8;
9570 : }
9571 :
9572 : /*
9573 : * 2 elements per iteration
9574 : */
9575 0 : for(k=0; k<ntrail2; k++)
9576 : {
9577 0 : v0 += pa0[0]*pb[0];
9578 0 : v1 += pa1[0]*pb[0];
9579 0 : v0 += pa0[1]*pb[1];
9580 0 : v1 += pa1[1]*pb[1];
9581 0 : pa0 += 2;
9582 0 : pa1 += 2;
9583 0 : pb += 2;
9584 : }
9585 :
9586 : /*
9587 : * last element, if needed
9588 : */
9589 0 : if( n%2!=0 )
9590 : {
9591 0 : v0 += pa0[0]*pb[0];
9592 0 : v1 += pa1[0]*pb[0];
9593 : }
9594 :
9595 : /*
9596 : * final update
9597 : */
9598 0 : if( beta!=0 )
9599 : {
9600 0 : y[0] = beta*y[0]+alpha*v0;
9601 0 : y[stride] = beta*y[stride]+alpha*v1;
9602 : }
9603 : else
9604 : {
9605 0 : y[0] = alpha*v0;
9606 0 : y[stride] = alpha*v1;
9607 : }
9608 :
9609 : /*
9610 : * move to the next pair of elements
9611 : */
9612 0 : y+=2*stride;
9613 : }
9614 :
9615 :
9616 : /*
9617 : * Last (odd) row is processed with less optimized code.
9618 : */
9619 0 : if( m%2!=0 )
9620 : {
9621 0 : double v0 = 0;
9622 :
9623 : /*
9624 : * 'a' points to the part of the matrix which
9625 : * is not processed yet
9626 : */
9627 0 : pb = x;
9628 0 : pa0 = a;
9629 :
9630 : /*
9631 : * 2 elements per iteration
9632 : */
9633 0 : n2 = n/2;
9634 0 : for(k=0; k<n2; k++)
9635 : {
9636 0 : v0 += pa0[0]*pb[0]+pa0[1]*pb[1];
9637 0 : pa0 += 2;
9638 0 : pb += 2;
9639 : }
9640 :
9641 : /*
9642 : * last element, if needed
9643 : */
9644 0 : if( n%2!=0 )
9645 0 : v0 += pa0[0]*pb[0];
9646 :
9647 : /*
9648 : * final update
9649 : */
9650 0 : if( beta!=0 )
9651 0 : y[0] = beta*y[0]+alpha*v0;
9652 : else
9653 0 : y[0] = alpha*v0;
9654 : }
9655 : }
9656 : }
9657 :
9658 :
9659 : /*************************************************************************
9660 : This function calculates MxN real matrix-vector product:
9661 :
9662 : y := beta*y + alpha*A*x
9663 :
9664 : using generic C code. It calls _ialglib_mv_32 if both M=32 and N=32.
9665 :
9666 : If beta is zero, we do not use previous values of y (they are overwritten
9667 : by alpha*A*x without ever being read). If alpha is zero, no matrix-vector
9668 : product is calculated (only beta is updated); however, this update is not
9669 : efficient and this function should NOT be used for multiplication of
9670 : vector and scalar.
9671 :
9672 : IMPORTANT:
9673 : * 0<=M<=alglib_r_block, 0<=N<=alglib_r_block
9674 : * A must be stored in row-major order with stride equal to alglib_r_block
9675 : * y may be non-aligned
9676 : * both A and x must have same offset with respect to 16-byte boundary:
9677 : either both are aligned, or both are aligned with offset 8. Function
9678 : will crash your system if you try to call it with misaligned or
9679 : incorrectly aligned data.
9680 :
9681 : This function supports SSE2; it can be used when:
9682 : 1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
9683 : 2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
9684 :
9685 : If (1) is failed, this function will be undefined. If (2) is failed, call
9686 : to this function will probably crash your system.
9687 :
9688 : If you want to know whether it is safe to call it, you should check
9689 : results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
9690 : and will do its work.
9691 : *************************************************************************/
9692 : #if defined(AE_HAS_SSE2_INTRINSICS)
9693 : void _ialglib_rmv_sse2(ae_int_t m, ae_int_t n, const double *a, const double *x, double *y, ae_int_t stride, double alpha, double beta)
9694 : {
9695 : ae_int_t i, k, n2;
9696 : ae_int_t mb3, mtail, nhead, nb8, nb2, ntail;
9697 : const double *pa0, *pa1, *pa2, *pb;
9698 : __m128d v0, v1, v2, va0, va1, va2, vx, vtmp;
9699 :
9700 : /*
9701 : * Handle special cases:
9702 : * - alpha is zero or n is zero
9703 : * - m is zero
9704 : */
9705 : if( m==0 )
9706 : return;
9707 : if( alpha==0.0 || n==0 )
9708 : {
9709 : if( beta==0.0 )
9710 : {
9711 : for(i=0; i<m; i++)
9712 : {
9713 : *y = 0.0;
9714 : y += stride;
9715 : }
9716 : }
9717 : else
9718 : {
9719 : for(i=0; i<m; i++)
9720 : {
9721 : *y *= beta;
9722 : y += stride;
9723 : }
9724 : }
9725 : return;
9726 : }
9727 :
9728 : /*
9729 : * Handle general case: nonzero alpha, n and m
9730 : *
9731 : * We divide problem as follows...
9732 : *
9733 : * Rows M are divided into:
9734 : * - mb3 blocks, each 3xN
9735 : * - mtail blocks, each 1xN
9736 : *
9737 : * Within a row, elements are divided into:
9738 : * - nhead 1x1 blocks (used to align the rest, either 0 or 1)
9739 : * - nb8 1x8 blocks, aligned to 16-byte boundary
9740 : * - nb2 1x2 blocks, aligned to 16-byte boundary
9741 : * - ntail 1x1 blocks, aligned too (altough we don't rely on it)
9742 : *
9743 : */
9744 : n2 = n/2;
9745 : mb3 = m/3;
9746 : mtail = m%3;
9747 : nhead = ae_misalignment(a,alglib_simd_alignment)==0 ? 0 : 1;
9748 : nb8 = (n-nhead)/8;
9749 : nb2 = (n-nhead-8*nb8)/2;
9750 : ntail = n-nhead-8*nb8-2*nb2;
9751 : for(i=0; i<mb3; i++)
9752 : {
9753 : double row0, row1, row2;
9754 : row0 = 0;
9755 : row1 = 0;
9756 : row2 = 0;
9757 : pb = x;
9758 : pa0 = a;
9759 : pa1 = a+alglib_r_block;
9760 : pa2 = a+alglib_twice_r_block;
9761 : a += 3*alglib_r_block;
9762 : if( nhead==1 )
9763 : {
9764 : vx = _mm_load_sd(pb);
9765 : v0 = _mm_load_sd(pa0);
9766 : v1 = _mm_load_sd(pa1);
9767 : v2 = _mm_load_sd(pa2);
9768 :
9769 : v0 = _mm_mul_sd(v0,vx);
9770 : v1 = _mm_mul_sd(v1,vx);
9771 : v2 = _mm_mul_sd(v2,vx);
9772 :
9773 : pa0++;
9774 : pa1++;
9775 : pa2++;
9776 : pb++;
9777 : }
9778 : else
9779 : {
9780 : v0 = _mm_setzero_pd();
9781 : v1 = _mm_setzero_pd();
9782 : v2 = _mm_setzero_pd();
9783 : }
9784 : for(k=0; k<nb8; k++)
9785 : {
9786 : /*
9787 : * this code is a shuffle of simultaneous dot product.
9788 : * see below for commented unshuffled original version.
9789 : */
9790 : vx = _mm_load_pd(pb);
9791 : va0 = _mm_load_pd(pa0);
9792 : va1 = _mm_load_pd(pa1);
9793 : va0 = _mm_mul_pd(va0,vx);
9794 : va2 = _mm_load_pd(pa2);
9795 : v0 = _mm_add_pd(va0,v0);
9796 : va1 = _mm_mul_pd(va1,vx);
9797 : va0 = _mm_load_pd(pa0+2);
9798 : v1 = _mm_add_pd(va1,v1);
9799 : va2 = _mm_mul_pd(va2,vx);
9800 : va1 = _mm_load_pd(pa1+2);
9801 : v2 = _mm_add_pd(va2,v2);
9802 : vx = _mm_load_pd(pb+2);
9803 : va0 = _mm_mul_pd(va0,vx);
9804 : va2 = _mm_load_pd(pa2+2);
9805 : v0 = _mm_add_pd(va0,v0);
9806 : va1 = _mm_mul_pd(va1,vx);
9807 : va0 = _mm_load_pd(pa0+4);
9808 : v1 = _mm_add_pd(va1,v1);
9809 : va2 = _mm_mul_pd(va2,vx);
9810 : va1 = _mm_load_pd(pa1+4);
9811 : v2 = _mm_add_pd(va2,v2);
9812 : vx = _mm_load_pd(pb+4);
9813 : va0 = _mm_mul_pd(va0,vx);
9814 : va2 = _mm_load_pd(pa2+4);
9815 : v0 = _mm_add_pd(va0,v0);
9816 : va1 = _mm_mul_pd(va1,vx);
9817 : va0 = _mm_load_pd(pa0+6);
9818 : v1 = _mm_add_pd(va1,v1);
9819 : va2 = _mm_mul_pd(va2,vx);
9820 : va1 = _mm_load_pd(pa1+6);
9821 : v2 = _mm_add_pd(va2,v2);
9822 : vx = _mm_load_pd(pb+6);
9823 : va0 = _mm_mul_pd(va0,vx);
9824 : v0 = _mm_add_pd(va0,v0);
9825 : va2 = _mm_load_pd(pa2+6);
9826 : va1 = _mm_mul_pd(va1,vx);
9827 : v1 = _mm_add_pd(va1,v1);
9828 : va2 = _mm_mul_pd(va2,vx);
9829 : v2 = _mm_add_pd(va2,v2);
9830 :
9831 : pa0 += 8;
9832 : pa1 += 8;
9833 : pa2 += 8;
9834 : pb += 8;
9835 :
9836 : /*
9837 : this is unshuffled version of code above
9838 :
9839 : vx = _mm_load_pd(pb);
9840 : va0 = _mm_load_pd(pa0);
9841 : va1 = _mm_load_pd(pa1);
9842 : va2 = _mm_load_pd(pa2);
9843 :
9844 : va0 = _mm_mul_pd(va0,vx);
9845 : va1 = _mm_mul_pd(va1,vx);
9846 : va2 = _mm_mul_pd(va2,vx);
9847 :
9848 : v0 = _mm_add_pd(va0,v0);
9849 : v1 = _mm_add_pd(va1,v1);
9850 : v2 = _mm_add_pd(va2,v2);
9851 :
9852 : vx = _mm_load_pd(pb+2);
9853 : va0 = _mm_load_pd(pa0+2);
9854 : va1 = _mm_load_pd(pa1+2);
9855 : va2 = _mm_load_pd(pa2+2);
9856 :
9857 : va0 = _mm_mul_pd(va0,vx);
9858 : va1 = _mm_mul_pd(va1,vx);
9859 : va2 = _mm_mul_pd(va2,vx);
9860 :
9861 : v0 = _mm_add_pd(va0,v0);
9862 : v1 = _mm_add_pd(va1,v1);
9863 : v2 = _mm_add_pd(va2,v2);
9864 :
9865 : vx = _mm_load_pd(pb+4);
9866 : va0 = _mm_load_pd(pa0+4);
9867 : va1 = _mm_load_pd(pa1+4);
9868 : va2 = _mm_load_pd(pa2+4);
9869 :
9870 : va0 = _mm_mul_pd(va0,vx);
9871 : va1 = _mm_mul_pd(va1,vx);
9872 : va2 = _mm_mul_pd(va2,vx);
9873 :
9874 : v0 = _mm_add_pd(va0,v0);
9875 : v1 = _mm_add_pd(va1,v1);
9876 : v2 = _mm_add_pd(va2,v2);
9877 :
9878 : vx = _mm_load_pd(pb+6);
9879 : va0 = _mm_load_pd(pa0+6);
9880 : va1 = _mm_load_pd(pa1+6);
9881 : va2 = _mm_load_pd(pa2+6);
9882 :
9883 : va0 = _mm_mul_pd(va0,vx);
9884 : va1 = _mm_mul_pd(va1,vx);
9885 : va2 = _mm_mul_pd(va2,vx);
9886 :
9887 : v0 = _mm_add_pd(va0,v0);
9888 : v1 = _mm_add_pd(va1,v1);
9889 : v2 = _mm_add_pd(va2,v2);
9890 : */
9891 : }
9892 : for(k=0; k<nb2; k++)
9893 : {
9894 : vx = _mm_load_pd(pb);
9895 : va0 = _mm_load_pd(pa0);
9896 : va1 = _mm_load_pd(pa1);
9897 : va2 = _mm_load_pd(pa2);
9898 :
9899 : va0 = _mm_mul_pd(va0,vx);
9900 : v0 = _mm_add_pd(va0,v0);
9901 : va1 = _mm_mul_pd(va1,vx);
9902 : v1 = _mm_add_pd(va1,v1);
9903 : va2 = _mm_mul_pd(va2,vx);
9904 : v2 = _mm_add_pd(va2,v2);
9905 :
9906 : pa0 += 2;
9907 : pa1 += 2;
9908 : pa2 += 2;
9909 : pb += 2;
9910 : }
9911 : for(k=0; k<ntail; k++)
9912 : {
9913 : vx = _mm_load1_pd(pb);
9914 : va0 = _mm_load1_pd(pa0);
9915 : va1 = _mm_load1_pd(pa1);
9916 : va2 = _mm_load1_pd(pa2);
9917 :
9918 : va0 = _mm_mul_sd(va0,vx);
9919 : v0 = _mm_add_sd(v0,va0);
9920 : va1 = _mm_mul_sd(va1,vx);
9921 : v1 = _mm_add_sd(v1,va1);
9922 : va2 = _mm_mul_sd(va2,vx);
9923 : v2 = _mm_add_sd(v2,va2);
9924 : }
9925 : vtmp = _mm_add_pd(_mm_unpacklo_pd(v0,v1),_mm_unpackhi_pd(v0,v1));
9926 : _mm_storel_pd(&row0, vtmp);
9927 : _mm_storeh_pd(&row1, vtmp);
9928 : v2 = _mm_add_sd(_mm_shuffle_pd(v2,v2,1),v2);
9929 : _mm_storel_pd(&row2, v2);
9930 : if( beta!=0 )
9931 : {
9932 : y[0] = beta*y[0]+alpha*row0;
9933 : y[stride] = beta*y[stride]+alpha*row1;
9934 : y[2*stride] = beta*y[2*stride]+alpha*row2;
9935 : }
9936 : else
9937 : {
9938 : y[0] = alpha*row0;
9939 : y[stride] = alpha*row1;
9940 : y[2*stride] = alpha*row2;
9941 : }
9942 : y+=3*stride;
9943 : }
9944 : for(i=0; i<mtail; i++)
9945 : {
9946 : double row0;
9947 : row0 = 0;
9948 : pb = x;
9949 : pa0 = a;
9950 : a += alglib_r_block;
9951 : for(k=0; k<n2; k++)
9952 : {
9953 : row0 += pb[0]*pa0[0]+pb[1]*pa0[1];
9954 : pa0 += 2;
9955 : pb += 2;
9956 : }
9957 : if( n%2 )
9958 : row0 += pb[0]*pa0[0];
9959 : if( beta!=0 )
9960 : y[0] = beta*y[0]+alpha*row0;
9961 : else
9962 : y[0] = alpha*row0;
9963 : y+=stride;
9964 : }
9965 : }
9966 : #endif
9967 :
9968 :
9969 : /*************************************************************************
9970 : This subroutine calculates fast MxN complex matrix-vector product:
9971 :
9972 : y := beta*y + alpha*A*x
9973 :
9974 : using generic C code, where A, x, y, alpha and beta are complex.
9975 :
9976 : If beta is zero, we do not use previous values of y (they are overwritten
9977 : by alpha*A*x without ever being read). However, when alpha is zero, we
9978 : still calculate A*x and multiply it by alpha (this distinction can be
9979 : important when A or x contain infinities/NANs).
9980 :
9981 : IMPORTANT:
9982 : * 0<=M<=alglib_c_block, 0<=N<=alglib_c_block
9983 : * A must be stored in row-major order, as sequence of double precision
9984 : pairs. Stride is alglib_c_block (it is measured in pairs of doubles, not
9985 : in doubles).
9986 : * Y may be referenced by cy (pointer to ae_complex) or
9987 : dy (pointer to array of double precision pair) depending on what type of
9988 : output you wish. Pass pointer to Y as one of these parameters,
9989 : AND SET OTHER PARAMETER TO NULL.
9990 : * both A and x must be aligned; y may be non-aligned.
9991 : *************************************************************************/
9992 0 : void _ialglib_cmv(ae_int_t m, ae_int_t n, const double *a, const double *x, ae_complex *cy, double *dy, ae_int_t stride, ae_complex alpha, ae_complex beta)
9993 : {
9994 : ae_int_t i, j;
9995 : const double *pa, *parow, *pb;
9996 :
9997 0 : parow = a;
9998 0 : for(i=0; i<m; i++)
9999 : {
10000 0 : double v0 = 0, v1 = 0;
10001 0 : pa = parow;
10002 0 : pb = x;
10003 0 : for(j=0; j<n; j++)
10004 : {
10005 0 : v0 += pa[0]*pb[0];
10006 0 : v1 += pa[0]*pb[1];
10007 0 : v0 -= pa[1]*pb[1];
10008 0 : v1 += pa[1]*pb[0];
10009 :
10010 0 : pa += 2;
10011 0 : pb += 2;
10012 : }
10013 0 : if( cy!=NULL )
10014 : {
10015 0 : double tx = (beta.x*cy->x-beta.y*cy->y)+(alpha.x*v0-alpha.y*v1);
10016 0 : double ty = (beta.x*cy->y+beta.y*cy->x)+(alpha.x*v1+alpha.y*v0);
10017 0 : cy->x = tx;
10018 0 : cy->y = ty;
10019 0 : cy+=stride;
10020 : }
10021 : else
10022 : {
10023 0 : double tx = (beta.x*dy[0]-beta.y*dy[1])+(alpha.x*v0-alpha.y*v1);
10024 0 : double ty = (beta.x*dy[1]+beta.y*dy[0])+(alpha.x*v1+alpha.y*v0);
10025 0 : dy[0] = tx;
10026 0 : dy[1] = ty;
10027 0 : dy += 2*stride;
10028 : }
10029 0 : parow += 2*alglib_c_block;
10030 : }
10031 0 : }
10032 :
10033 :
10034 : /*************************************************************************
10035 : This subroutine calculates fast MxN complex matrix-vector product:
10036 :
10037 : y := beta*y + alpha*A*x
10038 :
10039 : using generic C code, where A, x, y, alpha and beta are complex.
10040 :
10041 : If beta is zero, we do not use previous values of y (they are overwritten
10042 : by alpha*A*x without ever being read). However, when alpha is zero, we
10043 : still calculate A*x and multiply it by alpha (this distinction can be
10044 : important when A or x contain infinities/NANs).
10045 :
10046 : IMPORTANT:
10047 : * 0<=M<=alglib_c_block, 0<=N<=alglib_c_block
10048 : * A must be stored in row-major order, as sequence of double precision
10049 : pairs. Stride is alglib_c_block (it is measured in pairs of doubles, not
10050 : in doubles).
10051 : * Y may be referenced by cy (pointer to ae_complex) or
10052 : dy (pointer to array of double precision pair) depending on what type of
10053 : output you wish. Pass pointer to Y as one of these parameters,
10054 : AND SET OTHER PARAMETER TO NULL.
10055 : * both A and x must be aligned; y may be non-aligned.
10056 :
10057 : This function supports SSE2; it can be used when:
10058 : 1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
10059 : 2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
10060 :
10061 : If (1) is failed, this function will be undefined. If (2) is failed, call
10062 : to this function will probably crash your system.
10063 :
10064 : If you want to know whether it is safe to call it, you should check
10065 : results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
10066 : and will do its work.
10067 : *************************************************************************/
10068 : #if defined(AE_HAS_SSE2_INTRINSICS)
10069 : void _ialglib_cmv_sse2(ae_int_t m, ae_int_t n, const double *a, const double *x, ae_complex *cy, double *dy, ae_int_t stride, ae_complex alpha, ae_complex beta)
10070 : {
10071 : ae_int_t i, j, m2;
10072 : const double *pa0, *pa1, *parow, *pb;
10073 : __m128d vbeta, vbetax, vbetay;
10074 : __m128d valpha, valphax, valphay;
10075 :
10076 : m2 = m/2;
10077 : parow = a;
10078 : if( cy!=NULL )
10079 : {
10080 : dy = (double*)cy;
10081 : cy = NULL;
10082 : }
10083 : vbeta = _mm_loadh_pd(_mm_load_sd(&beta.x),&beta.y);
10084 : vbetax = _mm_unpacklo_pd(vbeta,vbeta);
10085 : vbetay = _mm_unpackhi_pd(vbeta,vbeta);
10086 : valpha = _mm_loadh_pd(_mm_load_sd(&alpha.x),&alpha.y);
10087 : valphax = _mm_unpacklo_pd(valpha,valpha);
10088 : valphay = _mm_unpackhi_pd(valpha,valpha);
10089 : for(i=0; i<m2; i++)
10090 : {
10091 : __m128d vx, vy, vt0, vt1, vt2, vt3, vt4, vt5, vrx, vry, vtx, vty;
10092 : pa0 = parow;
10093 : pa1 = parow+2*alglib_c_block;
10094 : pb = x;
10095 : vx = _mm_setzero_pd();
10096 : vy = _mm_setzero_pd();
10097 : for(j=0; j<n; j++)
10098 : {
10099 : vt0 = _mm_load1_pd(pb);
10100 : vt1 = _mm_load1_pd(pb+1);
10101 : vt2 = _mm_load_pd(pa0);
10102 : vt3 = _mm_load_pd(pa1);
10103 : vt5 = _mm_unpacklo_pd(vt2,vt3);
10104 : vt4 = _mm_unpackhi_pd(vt2,vt3);
10105 : vt2 = vt5;
10106 : vt3 = vt4;
10107 :
10108 : vt2 = _mm_mul_pd(vt2,vt0);
10109 : vx = _mm_add_pd(vx,vt2);
10110 : vt3 = _mm_mul_pd(vt3,vt1);
10111 : vx = _mm_sub_pd(vx,vt3);
10112 : vt4 = _mm_mul_pd(vt4,vt0);
10113 : vy = _mm_add_pd(vy,vt4);
10114 : vt5 = _mm_mul_pd(vt5,vt1);
10115 : vy = _mm_add_pd(vy,vt5);
10116 :
10117 : pa0 += 2;
10118 : pa1 += 2;
10119 : pb += 2;
10120 : }
10121 : if( beta.x==0.0 && beta.y==0.0 )
10122 : {
10123 : vrx = _mm_setzero_pd();
10124 : vry = _mm_setzero_pd();
10125 : }
10126 : else
10127 : {
10128 : vtx = _mm_loadh_pd(_mm_load_sd(dy+0),dy+2*stride+0);
10129 : vty = _mm_loadh_pd(_mm_load_sd(dy+1),dy+2*stride+1);
10130 : vrx = _mm_sub_pd(_mm_mul_pd(vbetax,vtx),_mm_mul_pd(vbetay,vty));
10131 : vry = _mm_add_pd(_mm_mul_pd(vbetax,vty),_mm_mul_pd(vbetay,vtx));
10132 : }
10133 : vtx = _mm_sub_pd(_mm_mul_pd(valphax,vx),_mm_mul_pd(valphay,vy));
10134 : vty = _mm_add_pd(_mm_mul_pd(valphax,vy),_mm_mul_pd(valphay,vx));
10135 : vrx = _mm_add_pd(vrx,vtx);
10136 : vry = _mm_add_pd(vry,vty);
10137 : _mm_storel_pd(dy+0, vrx);
10138 : _mm_storeh_pd(dy+2*stride+0, vrx);
10139 : _mm_storel_pd(dy+1, vry);
10140 : _mm_storeh_pd(dy+2*stride+1, vry);
10141 : dy += 4*stride;
10142 : parow += 4*alglib_c_block;
10143 : }
10144 : if( m%2 )
10145 : {
10146 : double v0 = 0, v1 = 0;
10147 : double tx, ty;
10148 : pa0 = parow;
10149 : pb = x;
10150 : for(j=0; j<n; j++)
10151 : {
10152 : v0 += pa0[0]*pb[0];
10153 : v1 += pa0[0]*pb[1];
10154 : v0 -= pa0[1]*pb[1];
10155 : v1 += pa0[1]*pb[0];
10156 :
10157 : pa0 += 2;
10158 : pb += 2;
10159 : }
10160 : if( beta.x==0.0 && beta.y==0.0 )
10161 : {
10162 : tx = 0.0;
10163 : ty = 0.0;
10164 : }
10165 : else
10166 : {
10167 : tx = beta.x*dy[0]-beta.y*dy[1];
10168 : ty = beta.x*dy[1]+beta.y*dy[0];
10169 : }
10170 : tx += alpha.x*v0-alpha.y*v1;
10171 : ty += alpha.x*v1+alpha.y*v0;
10172 : dy[0] = tx;
10173 : dy[1] = ty;
10174 : dy += 2*stride;
10175 : parow += 2*alglib_c_block;
10176 : }
10177 : }
10178 : #endif
10179 :
10180 : /********************************************************************
10181 : This subroutine sets vector to zero
10182 : ********************************************************************/
10183 0 : void _ialglib_vzero(ae_int_t n, double *p, ae_int_t stride)
10184 : {
10185 : ae_int_t i;
10186 0 : if( stride==1 )
10187 : {
10188 0 : for(i=0; i<n; i++,p++)
10189 0 : *p = 0.0;
10190 : }
10191 : else
10192 : {
10193 0 : for(i=0; i<n; i++,p+=stride)
10194 0 : *p = 0.0;
10195 : }
10196 0 : }
10197 :
10198 : /********************************************************************
10199 : This subroutine sets vector to zero
10200 : ********************************************************************/
10201 0 : void _ialglib_vzero_complex(ae_int_t n, ae_complex *p, ae_int_t stride)
10202 : {
10203 : ae_int_t i;
10204 0 : if( stride==1 )
10205 : {
10206 0 : for(i=0; i<n; i++,p++)
10207 : {
10208 0 : p->x = 0.0;
10209 0 : p->y = 0.0;
10210 : }
10211 : }
10212 : else
10213 : {
10214 0 : for(i=0; i<n; i++,p+=stride)
10215 : {
10216 0 : p->x = 0.0;
10217 0 : p->y = 0.0;
10218 : }
10219 : }
10220 0 : }
10221 :
10222 :
10223 : /********************************************************************
10224 : This subroutine copies unaligned real vector
10225 : ********************************************************************/
10226 0 : void _ialglib_vcopy(ae_int_t n, const double *a, ae_int_t stridea, double *b, ae_int_t strideb)
10227 : {
10228 : ae_int_t i, n2;
10229 0 : if( stridea==1 && strideb==1 )
10230 : {
10231 0 : n2 = n/2;
10232 0 : for(i=n2; i!=0; i--, a+=2, b+=2)
10233 : {
10234 0 : b[0] = a[0];
10235 0 : b[1] = a[1];
10236 : }
10237 0 : if( n%2!=0 )
10238 0 : b[0] = a[0];
10239 : }
10240 : else
10241 : {
10242 0 : for(i=0; i<n; i++,a+=stridea,b+=strideb)
10243 0 : *b = *a;
10244 : }
10245 0 : }
10246 :
10247 :
10248 : /********************************************************************
10249 : This subroutine copies unaligned complex vector
10250 : (passed as ae_complex*)
10251 :
10252 : 1. strideb is stride measured in complex numbers, not doubles
10253 : 2. conj may be "N" (no conj.) or "C" (conj.)
10254 : ********************************************************************/
10255 0 : void _ialglib_vcopy_complex(ae_int_t n, const ae_complex *a, ae_int_t stridea, double *b, ae_int_t strideb, const char *conj)
10256 : {
10257 : ae_int_t i;
10258 :
10259 : /*
10260 : * more general case
10261 : */
10262 0 : if( conj[0]=='N' || conj[0]=='n' )
10263 : {
10264 0 : for(i=0; i<n; i++,a+=stridea,b+=2*strideb)
10265 : {
10266 0 : b[0] = a->x;
10267 0 : b[1] = a->y;
10268 : }
10269 : }
10270 : else
10271 : {
10272 0 : for(i=0; i<n; i++,a+=stridea,b+=2*strideb)
10273 : {
10274 0 : b[0] = a->x;
10275 0 : b[1] = -a->y;
10276 : }
10277 : }
10278 0 : }
10279 :
10280 :
10281 : /********************************************************************
10282 : This subroutine copies unaligned complex vector (passed as double*)
10283 :
10284 : 1. strideb is stride measured in complex numbers, not doubles
10285 : 2. conj may be "N" (no conj.) or "C" (conj.)
10286 : ********************************************************************/
10287 0 : void _ialglib_vcopy_dcomplex(ae_int_t n, const double *a, ae_int_t stridea, double *b, ae_int_t strideb, const char *conj)
10288 : {
10289 : ae_int_t i;
10290 :
10291 : /*
10292 : * more general case
10293 : */
10294 0 : if( conj[0]=='N' || conj[0]=='n' )
10295 : {
10296 0 : for(i=0; i<n; i++,a+=2*stridea,b+=2*strideb)
10297 : {
10298 0 : b[0] = a[0];
10299 0 : b[1] = a[1];
10300 : }
10301 : }
10302 : else
10303 : {
10304 0 : for(i=0; i<n; i++,a+=2*stridea,b+=2*strideb)
10305 : {
10306 0 : b[0] = a[0];
10307 0 : b[1] = -a[1];
10308 : }
10309 : }
10310 0 : }
10311 :
10312 :
10313 : /********************************************************************
10314 : This subroutine copies matrix from non-aligned non-contigous storage
10315 : to aligned contigous storage
10316 :
10317 : A:
10318 : * MxN
10319 : * non-aligned
10320 : * non-contigous
10321 : * may be transformed during copying (as prescribed by op)
10322 :
10323 : B:
10324 : * alglib_r_block*alglib_r_block (only MxN/NxM submatrix is used)
10325 : * aligned
10326 : * stride is alglib_r_block
10327 :
10328 : Transformation types:
10329 : * 0 - no transform
10330 : * 1 - transposition
10331 : ********************************************************************/
10332 0 : void _ialglib_mcopyblock(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_int_t stride, double *b)
10333 : {
10334 : ae_int_t i, j, n2;
10335 : const double *psrc;
10336 : double *pdst;
10337 0 : if( op==0 )
10338 : {
10339 0 : n2 = n/2;
10340 0 : for(i=0,psrc=a; i<m; i++,a+=stride,b+=alglib_r_block,psrc=a)
10341 : {
10342 0 : for(j=0,pdst=b; j<n2; j++,pdst+=2,psrc+=2)
10343 : {
10344 0 : pdst[0] = psrc[0];
10345 0 : pdst[1] = psrc[1];
10346 : }
10347 0 : if( n%2!=0 )
10348 0 : pdst[0] = psrc[0];
10349 : }
10350 : }
10351 : else
10352 : {
10353 0 : n2 = n/2;
10354 0 : for(i=0,psrc=a; i<m; i++,a+=stride,b+=1,psrc=a)
10355 : {
10356 0 : for(j=0,pdst=b; j<n2; j++,pdst+=alglib_twice_r_block,psrc+=2)
10357 : {
10358 0 : pdst[0] = psrc[0];
10359 0 : pdst[alglib_r_block] = psrc[1];
10360 : }
10361 0 : if( n%2!=0 )
10362 0 : pdst[0] = psrc[0];
10363 : }
10364 : }
10365 0 : }
10366 :
10367 :
10368 : /********************************************************************
10369 : This subroutine copies matrix from non-aligned non-contigous storage
10370 : to aligned contigous storage
10371 :
10372 : A:
10373 : * MxN
10374 : * non-aligned
10375 : * non-contigous
10376 : * may be transformed during copying (as prescribed by op)
10377 :
10378 : B:
10379 : * alglib_r_block*alglib_r_block (only MxN/NxM submatrix is used)
10380 : * aligned
10381 : * stride is alglib_r_block
10382 :
10383 : Transformation types:
10384 : * 0 - no transform
10385 : * 1 - transposition
10386 :
10387 : This function supports SSE2; it can be used when:
10388 : 1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
10389 : 2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
10390 :
10391 : If (1) is failed, this function will be undefined. If (2) is failed, call
10392 : to this function will probably crash your system.
10393 :
10394 : If you want to know whether it is safe to call it, you should check
10395 : results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
10396 : and will do its work.
10397 : ********************************************************************/
10398 : #if defined(AE_HAS_SSE2_INTRINSICS)
10399 : void _ialglib_mcopyblock_sse2(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_int_t stride, double *b)
10400 : {
10401 : ae_int_t i, j, mb2;
10402 : const double *psrc0, *psrc1;
10403 : double *pdst;
10404 : if( op==0 )
10405 : {
10406 : ae_int_t nb8, ntail;
10407 : nb8 = n/8;
10408 : ntail = n-8*nb8;
10409 : for(i=0,psrc0=a; i<m; i++,a+=stride,b+=alglib_r_block,psrc0=a)
10410 : {
10411 : pdst=b;
10412 : for(j=0; j<nb8; j++)
10413 : {
10414 : __m128d v0, v1;
10415 : v0 = _mm_loadu_pd(psrc0);
10416 : _mm_store_pd(pdst, v0);
10417 : v1 = _mm_loadu_pd(psrc0+2);
10418 : _mm_store_pd(pdst+2, v1);
10419 : v1 = _mm_loadu_pd(psrc0+4);
10420 : _mm_store_pd(pdst+4, v1);
10421 : v1 = _mm_loadu_pd(psrc0+6);
10422 : _mm_store_pd(pdst+6, v1);
10423 : pdst+=8;
10424 : psrc0+=8;
10425 : }
10426 : for(j=0; j<ntail; j++)
10427 : pdst[j] = psrc0[j];
10428 : }
10429 : }
10430 : else
10431 : {
10432 : const double *arow0, *arow1;
10433 : double *bcol0, *bcol1, *pdst0, *pdst1;
10434 : ae_int_t nb4, ntail, n2;
10435 :
10436 : n2 = n/2;
10437 : mb2 = m/2;
10438 : nb4 = n/4;
10439 : ntail = n-4*nb4;
10440 :
10441 : arow0 = a;
10442 : arow1 = a+stride;
10443 : bcol0 = b;
10444 : bcol1 = b+1;
10445 : for(i=0; i<mb2; i++)
10446 : {
10447 : psrc0 = arow0;
10448 : psrc1 = arow1;
10449 : pdst0 = bcol0;
10450 : pdst1 = bcol1;
10451 : for(j=0; j<nb4; j++)
10452 : {
10453 : __m128d v0, v1, v2, v3;
10454 : v0 = _mm_loadu_pd(psrc0);
10455 : v1 = _mm_loadu_pd(psrc1);
10456 : v2 = _mm_loadu_pd(psrc0+2);
10457 : v3 = _mm_loadu_pd(psrc1+2);
10458 : _mm_store_pd(pdst0, _mm_unpacklo_pd(v0,v1));
10459 : _mm_store_pd(pdst0+alglib_r_block, _mm_unpackhi_pd(v0,v1));
10460 : _mm_store_pd(pdst0+2*alglib_r_block, _mm_unpacklo_pd(v2,v3));
10461 : _mm_store_pd(pdst0+3*alglib_r_block, _mm_unpackhi_pd(v2,v3));
10462 :
10463 : pdst0 += 4*alglib_r_block;
10464 : pdst1 += 4*alglib_r_block;
10465 : psrc0 += 4;
10466 : psrc1 += 4;
10467 : }
10468 : for(j=0; j<ntail; j++)
10469 : {
10470 : pdst0[0] = psrc0[0];
10471 : pdst1[0] = psrc1[0];
10472 : pdst0 += alglib_r_block;
10473 : pdst1 += alglib_r_block;
10474 : psrc0 += 1;
10475 : psrc1 += 1;
10476 : }
10477 : arow0 += 2*stride;
10478 : arow1 += 2*stride;
10479 : bcol0 += 2;
10480 : bcol1 += 2;
10481 : }
10482 : if( m%2 )
10483 : {
10484 : psrc0 = arow0;
10485 : pdst0 = bcol0;
10486 : for(j=0; j<n2; j++)
10487 : {
10488 : pdst0[0] = psrc0[0];
10489 : pdst0[alglib_r_block] = psrc0[1];
10490 : pdst0 += alglib_twice_r_block;
10491 : psrc0 += 2;
10492 : }
10493 : if( n%2!=0 )
10494 : pdst0[0] = psrc0[0];
10495 : }
10496 : }
10497 : }
10498 : #endif
10499 :
10500 :
10501 : /********************************************************************
10502 : This subroutine copies matrix from aligned contigous storage to non-
10503 : aligned non-contigous storage
10504 :
10505 : A:
10506 : * MxN
10507 : * aligned
10508 : * contigous
10509 : * stride is alglib_r_block
10510 : * may be transformed during copying (as prescribed by op)
10511 :
10512 : B:
10513 : * alglib_r_block*alglib_r_block (only MxN/NxM submatrix is used)
10514 : * non-aligned, non-contigous
10515 :
10516 : Transformation types:
10517 : * 0 - no transform
10518 : * 1 - transposition
10519 : ********************************************************************/
10520 0 : void _ialglib_mcopyunblock(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, double *b, ae_int_t stride)
10521 : {
10522 : ae_int_t i, j, n2;
10523 : const double *psrc;
10524 : double *pdst;
10525 0 : if( op==0 )
10526 : {
10527 0 : n2 = n/2;
10528 0 : for(i=0,psrc=a; i<m; i++,a+=alglib_r_block,b+=stride,psrc=a)
10529 : {
10530 0 : for(j=0,pdst=b; j<n2; j++,pdst+=2,psrc+=2)
10531 : {
10532 0 : pdst[0] = psrc[0];
10533 0 : pdst[1] = psrc[1];
10534 : }
10535 0 : if( n%2!=0 )
10536 0 : pdst[0] = psrc[0];
10537 : }
10538 : }
10539 : else
10540 : {
10541 0 : n2 = n/2;
10542 0 : for(i=0,psrc=a; i<m; i++,a++,b+=stride,psrc=a)
10543 : {
10544 0 : for(j=0,pdst=b; j<n2; j++,pdst+=2,psrc+=alglib_twice_r_block)
10545 : {
10546 0 : pdst[0] = psrc[0];
10547 0 : pdst[1] = psrc[alglib_r_block];
10548 : }
10549 0 : if( n%2!=0 )
10550 0 : pdst[0] = psrc[0];
10551 : }
10552 : }
10553 0 : }
10554 :
10555 :
10556 : /********************************************************************
10557 : This subroutine copies matrix from non-aligned non-contigous storage
10558 : to aligned contigous storage
10559 :
10560 : A:
10561 : * MxN
10562 : * non-aligned
10563 : * non-contigous
10564 : * may be transformed during copying (as prescribed by op)
10565 : * pointer to ae_complex is passed
10566 :
10567 : B:
10568 : * 2*alglib_c_block*alglib_c_block doubles (only MxN/NxM submatrix is used)
10569 : * aligned
10570 : * stride is alglib_c_block
10571 : * pointer to double is passed
10572 :
10573 : Transformation types:
10574 : * 0 - no transform
10575 : * 1 - transposition
10576 : * 2 - conjugate transposition
10577 : * 3 - conjugate, but no transposition
10578 : ********************************************************************/
10579 0 : void _ialglib_mcopyblock_complex(ae_int_t m, ae_int_t n, const ae_complex *a, ae_int_t op, ae_int_t stride, double *b)
10580 : {
10581 : ae_int_t i, j;
10582 : const ae_complex *psrc;
10583 : double *pdst;
10584 0 : if( op==0 )
10585 : {
10586 0 : for(i=0,psrc=a; i<m; i++,a+=stride,b+=alglib_twice_c_block,psrc=a)
10587 0 : for(j=0,pdst=b; j<n; j++,pdst+=2,psrc++)
10588 : {
10589 0 : pdst[0] = psrc->x;
10590 0 : pdst[1] = psrc->y;
10591 : }
10592 : }
10593 0 : if( op==1 )
10594 : {
10595 0 : for(i=0,psrc=a; i<m; i++,a+=stride,b+=2,psrc=a)
10596 0 : for(j=0,pdst=b; j<n; j++,pdst+=alglib_twice_c_block,psrc++)
10597 : {
10598 0 : pdst[0] = psrc->x;
10599 0 : pdst[1] = psrc->y;
10600 : }
10601 : }
10602 0 : if( op==2 )
10603 : {
10604 0 : for(i=0,psrc=a; i<m; i++,a+=stride,b+=2,psrc=a)
10605 0 : for(j=0,pdst=b; j<n; j++,pdst+=alglib_twice_c_block,psrc++)
10606 : {
10607 0 : pdst[0] = psrc->x;
10608 0 : pdst[1] = -psrc->y;
10609 : }
10610 : }
10611 0 : if( op==3 )
10612 : {
10613 0 : for(i=0,psrc=a; i<m; i++,a+=stride,b+=alglib_twice_c_block,psrc=a)
10614 0 : for(j=0,pdst=b; j<n; j++,pdst+=2,psrc++)
10615 : {
10616 0 : pdst[0] = psrc->x;
10617 0 : pdst[1] = -psrc->y;
10618 : }
10619 : }
10620 0 : }
10621 :
10622 :
10623 : /********************************************************************
10624 : This subroutine copies matrix from aligned contigous storage to
10625 : non-aligned non-contigous storage
10626 :
10627 : A:
10628 : * 2*alglib_c_block*alglib_c_block doubles (only MxN submatrix is used)
10629 : * aligned
10630 : * stride is alglib_c_block
10631 : * pointer to double is passed
10632 : * may be transformed during copying (as prescribed by op)
10633 :
10634 : B:
10635 : * MxN
10636 : * non-aligned
10637 : * non-contigous
10638 : * pointer to ae_complex is passed
10639 :
10640 : Transformation types:
10641 : * 0 - no transform
10642 : * 1 - transposition
10643 : * 2 - conjugate transposition
10644 : * 3 - conjugate, but no transposition
10645 : ********************************************************************/
10646 0 : void _ialglib_mcopyunblock_complex(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_complex* b, ae_int_t stride)
10647 : {
10648 : ae_int_t i, j;
10649 : const double *psrc;
10650 : ae_complex *pdst;
10651 0 : if( op==0 )
10652 : {
10653 0 : for(i=0,psrc=a; i<m; i++,a+=alglib_twice_c_block,b+=stride,psrc=a)
10654 0 : for(j=0,pdst=b; j<n; j++,pdst++,psrc+=2)
10655 : {
10656 0 : pdst->x = psrc[0];
10657 0 : pdst->y = psrc[1];
10658 : }
10659 : }
10660 0 : if( op==1 )
10661 : {
10662 0 : for(i=0,psrc=a; i<m; i++,a+=2,b+=stride,psrc=a)
10663 0 : for(j=0,pdst=b; j<n; j++,pdst++,psrc+=alglib_twice_c_block)
10664 : {
10665 0 : pdst->x = psrc[0];
10666 0 : pdst->y = psrc[1];
10667 : }
10668 : }
10669 0 : if( op==2 )
10670 : {
10671 0 : for(i=0,psrc=a; i<m; i++,a+=2,b+=stride,psrc=a)
10672 0 : for(j=0,pdst=b; j<n; j++,pdst++,psrc+=alglib_twice_c_block)
10673 : {
10674 0 : pdst->x = psrc[0];
10675 0 : pdst->y = -psrc[1];
10676 : }
10677 : }
10678 0 : if( op==3 )
10679 : {
10680 0 : for(i=0,psrc=a; i<m; i++,a+=alglib_twice_c_block,b+=stride,psrc=a)
10681 0 : for(j=0,pdst=b; j<n; j++,pdst++,psrc+=2)
10682 : {
10683 0 : pdst->x = psrc[0];
10684 0 : pdst->y = -psrc[1];
10685 : }
10686 : }
10687 0 : }
10688 :
10689 :
10690 : /********************************************************************
10691 : Real GEMM kernel
10692 : ********************************************************************/
10693 0 : ae_bool _ialglib_rmatrixgemm(ae_int_t m,
10694 : ae_int_t n,
10695 : ae_int_t k,
10696 : double alpha,
10697 : double *_a,
10698 : ae_int_t _a_stride,
10699 : ae_int_t optypea,
10700 : double *_b,
10701 : ae_int_t _b_stride,
10702 : ae_int_t optypeb,
10703 : double beta,
10704 : double *_c,
10705 : ae_int_t _c_stride)
10706 : {
10707 : int i;
10708 : double *crow;
10709 : double _abuf[alglib_r_block+alglib_simd_alignment];
10710 : double _bbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
10711 0 : double * const abuf = (double * ) ae_align(_abuf,alglib_simd_alignment);
10712 0 : double * const b = (double * ) ae_align(_bbuf,alglib_simd_alignment);
10713 0 : void (*rmv)(ae_int_t, ae_int_t, const double *, const double *, double *, ae_int_t, double, double) = &_ialglib_rmv;
10714 0 : void (*mcopyblock)(ae_int_t, ae_int_t, const double *, ae_int_t, ae_int_t, double *) = &_ialglib_mcopyblock;
10715 :
10716 0 : if( m>alglib_r_block || n>alglib_r_block || k>alglib_r_block || m<=0 || n<=0 || k<=0 || alpha==0.0 )
10717 0 : return ae_false;
10718 :
10719 : /*
10720 : * Check for SSE2 support
10721 : */
10722 : #ifdef AE_HAS_SSE2_INTRINSICS
10723 : if( ae_cpuid() & CPU_SSE2 )
10724 : {
10725 : rmv = &_ialglib_rmv_sse2;
10726 : mcopyblock = &_ialglib_mcopyblock_sse2;
10727 : }
10728 : #endif
10729 :
10730 : /*
10731 : * copy b
10732 : */
10733 0 : if( optypeb==0 )
10734 0 : mcopyblock(k, n, _b, 1, _b_stride, b);
10735 : else
10736 0 : mcopyblock(n, k, _b, 0, _b_stride, b);
10737 :
10738 : /*
10739 : * multiply B by A (from the right, by rows)
10740 : * and store result in C
10741 : */
10742 0 : crow = _c;
10743 0 : if( optypea==0 )
10744 : {
10745 0 : const double *arow = _a;
10746 0 : for(i=0; i<m; i++)
10747 : {
10748 0 : _ialglib_vcopy(k, arow, 1, abuf, 1);
10749 0 : if( beta==0 )
10750 0 : _ialglib_vzero(n, crow, 1);
10751 0 : rmv(n, k, b, abuf, crow, 1, alpha, beta);
10752 0 : crow += _c_stride;
10753 0 : arow += _a_stride;
10754 : }
10755 : }
10756 : else
10757 : {
10758 0 : const double *acol = _a;
10759 0 : for(i=0; i<m; i++)
10760 : {
10761 0 : _ialglib_vcopy(k, acol, _a_stride, abuf, 1);
10762 0 : if( beta==0 )
10763 0 : _ialglib_vzero(n, crow, 1);
10764 0 : rmv(n, k, b, abuf, crow, 1, alpha, beta);
10765 0 : crow += _c_stride;
10766 0 : acol++;
10767 : }
10768 : }
10769 0 : return ae_true;
10770 : }
10771 :
10772 :
10773 : /********************************************************************
10774 : Complex GEMM kernel
10775 : ********************************************************************/
10776 0 : ae_bool _ialglib_cmatrixgemm(ae_int_t m,
10777 : ae_int_t n,
10778 : ae_int_t k,
10779 : ae_complex alpha,
10780 : ae_complex *_a,
10781 : ae_int_t _a_stride,
10782 : ae_int_t optypea,
10783 : ae_complex *_b,
10784 : ae_int_t _b_stride,
10785 : ae_int_t optypeb,
10786 : ae_complex beta,
10787 : ae_complex *_c,
10788 : ae_int_t _c_stride)
10789 : {
10790 : const ae_complex *arow;
10791 : ae_complex *crow;
10792 : ae_int_t i;
10793 : double _loc_abuf[2*alglib_c_block+alglib_simd_alignment];
10794 : double _loc_b[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
10795 0 : double * const abuf = (double *)ae_align(_loc_abuf,alglib_simd_alignment);
10796 0 : double * const b = (double *)ae_align(_loc_b, alglib_simd_alignment);
10797 : ae_int_t brows;
10798 : ae_int_t bcols;
10799 0 : void (*cmv)(ae_int_t, ae_int_t, const double *, const double *, ae_complex *, double *, ae_int_t, ae_complex, ae_complex) = &_ialglib_cmv;
10800 :
10801 0 : if( m>alglib_c_block || n>alglib_c_block || k>alglib_c_block )
10802 0 : return ae_false;
10803 :
10804 : /*
10805 : * Check for SSE2 support
10806 : */
10807 : #ifdef AE_HAS_SSE2_INTRINSICS
10808 : if( ae_cpuid() & CPU_SSE2 )
10809 : {
10810 : cmv = &_ialglib_cmv_sse2;
10811 : }
10812 : #endif
10813 :
10814 : /*
10815 : * copy b
10816 : */
10817 0 : brows = optypeb==0 ? k : n;
10818 0 : bcols = optypeb==0 ? n : k;
10819 0 : if( optypeb==0 )
10820 0 : _ialglib_mcopyblock_complex(brows, bcols, _b, 1, _b_stride, b);
10821 0 : if( optypeb==1 )
10822 0 : _ialglib_mcopyblock_complex(brows, bcols, _b, 0, _b_stride, b);
10823 0 : if( optypeb==2 )
10824 0 : _ialglib_mcopyblock_complex(brows, bcols, _b, 3, _b_stride, b);
10825 :
10826 : /*
10827 : * multiply B by A (from the right, by rows)
10828 : * and store result in C
10829 : */
10830 0 : arow = _a;
10831 0 : crow = _c;
10832 0 : for(i=0; i<m; i++)
10833 : {
10834 0 : if( optypea==0 )
10835 : {
10836 0 : _ialglib_vcopy_complex(k, arow, 1, abuf, 1, "No conj");
10837 0 : arow += _a_stride;
10838 : }
10839 0 : else if( optypea==1 )
10840 : {
10841 0 : _ialglib_vcopy_complex(k, arow, _a_stride, abuf, 1, "No conj");
10842 0 : arow++;
10843 : }
10844 : else
10845 : {
10846 0 : _ialglib_vcopy_complex(k, arow, _a_stride, abuf, 1, "Conj");
10847 0 : arow++;
10848 : }
10849 0 : if( beta.x==0 && beta.y==0 )
10850 0 : _ialglib_vzero_complex(n, crow, 1);
10851 0 : cmv(n, k, b, abuf, crow, NULL, 1, alpha, beta);
10852 0 : crow += _c_stride;
10853 : }
10854 0 : return ae_true;
10855 : }
10856 :
10857 :
10858 : /********************************************************************
10859 : complex TRSM kernel
10860 : ********************************************************************/
10861 0 : ae_bool _ialglib_cmatrixrighttrsm(ae_int_t m,
10862 : ae_int_t n,
10863 : ae_complex *_a,
10864 : ae_int_t _a_stride,
10865 : ae_bool isupper,
10866 : ae_bool isunit,
10867 : ae_int_t optype,
10868 : ae_complex *_x,
10869 : ae_int_t _x_stride)
10870 : {
10871 : /*
10872 : * local buffers
10873 : */
10874 : double *pdiag;
10875 : ae_int_t i;
10876 : double _loc_abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
10877 : double _loc_xbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
10878 : double _loc_tmpbuf[2*alglib_c_block+alglib_simd_alignment];
10879 0 : double * const abuf = (double*)ae_align(_loc_abuf, alglib_simd_alignment);
10880 0 : double * const xbuf = (double*)ae_align(_loc_xbuf, alglib_simd_alignment);
10881 0 : double * const tmpbuf = (double*)ae_align(_loc_tmpbuf,alglib_simd_alignment);
10882 : ae_bool uppera;
10883 0 : void (*cmv)(ae_int_t, ae_int_t, const double *, const double *, ae_complex *, double *, ae_int_t, ae_complex, ae_complex) = &_ialglib_cmv;
10884 :
10885 0 : if( m>alglib_c_block || n>alglib_c_block )
10886 0 : return ae_false;
10887 :
10888 : /*
10889 : * Check for SSE2 support
10890 : */
10891 : #ifdef AE_HAS_SSE2_INTRINSICS
10892 : if( ae_cpuid() & CPU_SSE2 )
10893 : {
10894 : cmv = &_ialglib_cmv_sse2;
10895 : }
10896 : #endif
10897 :
10898 : /*
10899 : * Prepare
10900 : */
10901 0 : _ialglib_mcopyblock_complex(n, n, _a, optype, _a_stride, abuf);
10902 0 : _ialglib_mcopyblock_complex(m, n, _x, 0, _x_stride, xbuf);
10903 0 : if( isunit )
10904 0 : for(i=0,pdiag=abuf; i<n; i++,pdiag+=2*(alglib_c_block+1))
10905 : {
10906 0 : pdiag[0] = 1.0;
10907 0 : pdiag[1] = 0.0;
10908 : }
10909 0 : if( optype==0 )
10910 0 : uppera = isupper;
10911 : else
10912 0 : uppera = !isupper;
10913 :
10914 : /*
10915 : * Solve Y*A^-1=X where A is upper or lower triangular
10916 : */
10917 0 : if( uppera )
10918 : {
10919 0 : for(i=0,pdiag=abuf; i<n; i++,pdiag+=2*(alglib_c_block+1))
10920 : {
10921 : ae_complex tmp_c;
10922 : ae_complex beta;
10923 : ae_complex alpha;
10924 0 : tmp_c.x = pdiag[0];
10925 0 : tmp_c.y = pdiag[1];
10926 0 : beta = ae_c_d_div(1.0, tmp_c);
10927 0 : alpha.x = -beta.x;
10928 0 : alpha.y = -beta.y;
10929 0 : _ialglib_vcopy_dcomplex(i, abuf+2*i, alglib_c_block, tmpbuf, 1, "No conj");
10930 0 : cmv(m, i, xbuf, tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
10931 : }
10932 0 : _ialglib_mcopyunblock_complex(m, n, xbuf, 0, _x, _x_stride);
10933 : }
10934 : else
10935 : {
10936 0 : for(i=n-1,pdiag=abuf+2*((n-1)*alglib_c_block+(n-1)); i>=0; i--,pdiag-=2*(alglib_c_block+1))
10937 : {
10938 : ae_complex tmp_c;
10939 : ae_complex beta;
10940 : ae_complex alpha;
10941 0 : tmp_c.x = pdiag[0];
10942 0 : tmp_c.y = pdiag[1];
10943 0 : beta = ae_c_d_div(1.0, tmp_c);
10944 0 : alpha.x = -beta.x;
10945 0 : alpha.y = -beta.y;
10946 0 : _ialglib_vcopy_dcomplex(n-1-i, pdiag+2*alglib_c_block, alglib_c_block, tmpbuf, 1, "No conj");
10947 0 : cmv(m, n-1-i, xbuf+2*(i+1), tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
10948 : }
10949 0 : _ialglib_mcopyunblock_complex(m, n, xbuf, 0, _x, _x_stride);
10950 : }
10951 0 : return ae_true;
10952 : }
10953 :
10954 :
10955 : /********************************************************************
10956 : real TRSM kernel
10957 : ********************************************************************/
10958 0 : ae_bool _ialglib_rmatrixrighttrsm(ae_int_t m,
10959 : ae_int_t n,
10960 : double *_a,
10961 : ae_int_t _a_stride,
10962 : ae_bool isupper,
10963 : ae_bool isunit,
10964 : ae_int_t optype,
10965 : double *_x,
10966 : ae_int_t _x_stride)
10967 : {
10968 : /*
10969 : * local buffers
10970 : */
10971 : double *pdiag;
10972 : ae_int_t i;
10973 : double _loc_abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
10974 : double _loc_xbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
10975 : double _loc_tmpbuf[alglib_r_block+alglib_simd_alignment];
10976 0 : double * const abuf = (double *) ae_align(_loc_abuf, alglib_simd_alignment);
10977 0 : double * const xbuf = (double *) ae_align(_loc_xbuf, alglib_simd_alignment);
10978 0 : double * const tmpbuf = (double *) ae_align(_loc_tmpbuf,alglib_simd_alignment);
10979 : ae_bool uppera;
10980 0 : void (*rmv)(ae_int_t, ae_int_t, const double *, const double *, double *, ae_int_t, double, double) = &_ialglib_rmv;
10981 0 : void (*mcopyblock)(ae_int_t, ae_int_t, const double *, ae_int_t, ae_int_t, double *) = &_ialglib_mcopyblock;
10982 :
10983 0 : if( m>alglib_r_block || n>alglib_r_block )
10984 0 : return ae_false;
10985 :
10986 : /*
10987 : * Check for SSE2 support
10988 : */
10989 : #ifdef AE_HAS_SSE2_INTRINSICS
10990 : if( ae_cpuid() & CPU_SSE2 )
10991 : {
10992 : rmv = &_ialglib_rmv_sse2;
10993 : mcopyblock = &_ialglib_mcopyblock_sse2;
10994 : }
10995 : #endif
10996 :
10997 : /*
10998 : * Prepare
10999 : */
11000 0 : mcopyblock(n, n, _a, optype, _a_stride, abuf);
11001 0 : mcopyblock(m, n, _x, 0, _x_stride, xbuf);
11002 0 : if( isunit )
11003 0 : for(i=0,pdiag=abuf; i<n; i++,pdiag+=alglib_r_block+1)
11004 0 : *pdiag = 1.0;
11005 0 : if( optype==0 )
11006 0 : uppera = isupper;
11007 : else
11008 0 : uppera = !isupper;
11009 :
11010 : /*
11011 : * Solve Y*A^-1=X where A is upper or lower triangular
11012 : */
11013 0 : if( uppera )
11014 : {
11015 0 : for(i=0,pdiag=abuf; i<n; i++,pdiag+=alglib_r_block+1)
11016 : {
11017 0 : double beta = 1.0/(*pdiag);
11018 0 : double alpha = -beta;
11019 0 : _ialglib_vcopy(i, abuf+i, alglib_r_block, tmpbuf, 1);
11020 0 : rmv(m, i, xbuf, tmpbuf, xbuf+i, alglib_r_block, alpha, beta);
11021 : }
11022 0 : _ialglib_mcopyunblock(m, n, xbuf, 0, _x, _x_stride);
11023 : }
11024 : else
11025 : {
11026 0 : for(i=n-1,pdiag=abuf+(n-1)*alglib_r_block+(n-1); i>=0; i--,pdiag-=alglib_r_block+1)
11027 : {
11028 0 : double beta = 1.0/(*pdiag);
11029 0 : double alpha = -beta;
11030 0 : _ialglib_vcopy(n-1-i, pdiag+alglib_r_block, alglib_r_block, tmpbuf+i+1, 1);
11031 0 : rmv(m, n-1-i, xbuf+i+1, tmpbuf+i+1, xbuf+i, alglib_r_block, alpha, beta);
11032 : }
11033 0 : _ialglib_mcopyunblock(m, n, xbuf, 0, _x, _x_stride);
11034 : }
11035 0 : return ae_true;
11036 : }
11037 :
11038 :
11039 : /********************************************************************
11040 : complex TRSM kernel
11041 : ********************************************************************/
11042 0 : ae_bool _ialglib_cmatrixlefttrsm(ae_int_t m,
11043 : ae_int_t n,
11044 : ae_complex *_a,
11045 : ae_int_t _a_stride,
11046 : ae_bool isupper,
11047 : ae_bool isunit,
11048 : ae_int_t optype,
11049 : ae_complex *_x,
11050 : ae_int_t _x_stride)
11051 : {
11052 : /*
11053 : * local buffers
11054 : */
11055 : double *pdiag, *arow;
11056 : ae_int_t i;
11057 : double _loc_abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
11058 : double _loc_xbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
11059 : double _loc_tmpbuf[2*alglib_c_block+alglib_simd_alignment];
11060 0 : double * const abuf = (double *) ae_align(_loc_abuf, alglib_simd_alignment);
11061 0 : double * const xbuf = (double *) ae_align(_loc_xbuf, alglib_simd_alignment);
11062 0 : double * const tmpbuf = (double *) ae_align(_loc_tmpbuf,alglib_simd_alignment);
11063 : ae_bool uppera;
11064 0 : void (*cmv)(ae_int_t, ae_int_t, const double *, const double *, ae_complex *, double *, ae_int_t, ae_complex, ae_complex) = &_ialglib_cmv;
11065 :
11066 0 : if( m>alglib_c_block || n>alglib_c_block )
11067 0 : return ae_false;
11068 :
11069 : /*
11070 : * Check for SSE2 support
11071 : */
11072 : #ifdef AE_HAS_SSE2_INTRINSICS
11073 : if( ae_cpuid() & CPU_SSE2 )
11074 : {
11075 : cmv = &_ialglib_cmv_sse2;
11076 : }
11077 : #endif
11078 :
11079 : /*
11080 : * Prepare
11081 : * Transpose X (so we may use mv, which calculates A*x, but not x*A)
11082 : */
11083 0 : _ialglib_mcopyblock_complex(m, m, _a, optype, _a_stride, abuf);
11084 0 : _ialglib_mcopyblock_complex(m, n, _x, 1, _x_stride, xbuf);
11085 0 : if( isunit )
11086 0 : for(i=0,pdiag=abuf; i<m; i++,pdiag+=2*(alglib_c_block+1))
11087 : {
11088 0 : pdiag[0] = 1.0;
11089 0 : pdiag[1] = 0.0;
11090 : }
11091 0 : if( optype==0 )
11092 0 : uppera = isupper;
11093 : else
11094 0 : uppera = !isupper;
11095 :
11096 : /*
11097 : * Solve A^-1*Y^T=X^T where A is upper or lower triangular
11098 : */
11099 0 : if( uppera )
11100 : {
11101 0 : for(i=m-1,pdiag=abuf+2*((m-1)*alglib_c_block+(m-1)); i>=0; i--,pdiag-=2*(alglib_c_block+1))
11102 : {
11103 : ae_complex tmp_c;
11104 : ae_complex beta;
11105 : ae_complex alpha;
11106 0 : tmp_c.x = pdiag[0];
11107 0 : tmp_c.y = pdiag[1];
11108 0 : beta = ae_c_d_div(1.0, tmp_c);
11109 0 : alpha.x = -beta.x;
11110 0 : alpha.y = -beta.y;
11111 0 : _ialglib_vcopy_dcomplex(m-1-i, pdiag+2, 1, tmpbuf, 1, "No conj");
11112 0 : cmv(n, m-1-i, xbuf+2*(i+1), tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
11113 : }
11114 0 : _ialglib_mcopyunblock_complex(m, n, xbuf, 1, _x, _x_stride);
11115 : }
11116 : else
11117 0 : { for(i=0,pdiag=abuf,arow=abuf; i<m; i++,pdiag+=2*(alglib_c_block+1),arow+=2*alglib_c_block)
11118 : {
11119 : ae_complex tmp_c;
11120 : ae_complex beta;
11121 : ae_complex alpha;
11122 0 : tmp_c.x = pdiag[0];
11123 0 : tmp_c.y = pdiag[1];
11124 0 : beta = ae_c_d_div(1.0, tmp_c);
11125 0 : alpha.x = -beta.x;
11126 0 : alpha.y = -beta.y;
11127 0 : _ialglib_vcopy_dcomplex(i, arow, 1, tmpbuf, 1, "No conj");
11128 0 : cmv(n, i, xbuf, tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
11129 : }
11130 0 : _ialglib_mcopyunblock_complex(m, n, xbuf, 1, _x, _x_stride);
11131 : }
11132 0 : return ae_true;
11133 : }
11134 :
11135 :
11136 : /********************************************************************
11137 : real TRSM kernel
11138 : ********************************************************************/
11139 0 : ae_bool _ialglib_rmatrixlefttrsm(ae_int_t m,
11140 : ae_int_t n,
11141 : double *_a,
11142 : ae_int_t _a_stride,
11143 : ae_bool isupper,
11144 : ae_bool isunit,
11145 : ae_int_t optype,
11146 : double *_x,
11147 : ae_int_t _x_stride)
11148 : {
11149 : /*
11150 : * local buffers
11151 : */
11152 : double *pdiag, *arow;
11153 : ae_int_t i;
11154 : double _loc_abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
11155 : double _loc_xbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
11156 : double _loc_tmpbuf[alglib_r_block+alglib_simd_alignment];
11157 0 : double * const abuf = (double *) ae_align(_loc_abuf, alglib_simd_alignment);
11158 0 : double * const xbuf = (double *) ae_align(_loc_xbuf, alglib_simd_alignment);
11159 0 : double * const tmpbuf = (double *) ae_align(_loc_tmpbuf,alglib_simd_alignment);
11160 : ae_bool uppera;
11161 0 : void (*rmv)(ae_int_t, ae_int_t, const double *, const double *, double *, ae_int_t, double, double) = &_ialglib_rmv;
11162 0 : void (*mcopyblock)(ae_int_t, ae_int_t, const double *, ae_int_t, ae_int_t, double *) = &_ialglib_mcopyblock;
11163 :
11164 0 : if( m>alglib_r_block || n>alglib_r_block )
11165 0 : return ae_false;
11166 :
11167 : /*
11168 : * Check for SSE2 support
11169 : */
11170 : #ifdef AE_HAS_SSE2_INTRINSICS
11171 : if( ae_cpuid() & CPU_SSE2 )
11172 : {
11173 : rmv = &_ialglib_rmv_sse2;
11174 : mcopyblock = &_ialglib_mcopyblock_sse2;
11175 : }
11176 : #endif
11177 :
11178 : /*
11179 : * Prepare
11180 : * Transpose X (so we may use mv, which calculates A*x, but not x*A)
11181 : */
11182 0 : mcopyblock(m, m, _a, optype, _a_stride, abuf);
11183 0 : mcopyblock(m, n, _x, 1, _x_stride, xbuf);
11184 0 : if( isunit )
11185 0 : for(i=0,pdiag=abuf; i<m; i++,pdiag+=alglib_r_block+1)
11186 0 : *pdiag = 1.0;
11187 0 : if( optype==0 )
11188 0 : uppera = isupper;
11189 : else
11190 0 : uppera = !isupper;
11191 :
11192 : /*
11193 : * Solve A^-1*Y^T=X^T where A is upper or lower triangular
11194 : */
11195 0 : if( uppera )
11196 : {
11197 0 : for(i=m-1,pdiag=abuf+(m-1)*alglib_r_block+(m-1); i>=0; i--,pdiag-=alglib_r_block+1)
11198 : {
11199 0 : double beta = 1.0/(*pdiag);
11200 0 : double alpha = -beta;
11201 0 : _ialglib_vcopy(m-1-i, pdiag+1, 1, tmpbuf+i+1, 1);
11202 0 : rmv(n, m-1-i, xbuf+i+1, tmpbuf+i+1, xbuf+i, alglib_r_block, alpha, beta);
11203 : }
11204 0 : _ialglib_mcopyunblock(m, n, xbuf, 1, _x, _x_stride);
11205 : }
11206 : else
11207 0 : { for(i=0,pdiag=abuf,arow=abuf; i<m; i++,pdiag+=alglib_r_block+1,arow+=alglib_r_block)
11208 : {
11209 0 : double beta = 1.0/(*pdiag);
11210 0 : double alpha = -beta;
11211 0 : _ialglib_vcopy(i, arow, 1, tmpbuf, 1);
11212 0 : rmv(n, i, xbuf, tmpbuf, xbuf+i, alglib_r_block, alpha, beta);
11213 : }
11214 0 : _ialglib_mcopyunblock(m, n, xbuf, 1, _x, _x_stride);
11215 : }
11216 0 : return ae_true;
11217 : }
11218 :
11219 :
11220 : /********************************************************************
11221 : complex SYRK kernel
11222 : ********************************************************************/
11223 0 : ae_bool _ialglib_cmatrixherk(ae_int_t n,
11224 : ae_int_t k,
11225 : double alpha,
11226 : ae_complex *_a,
11227 : ae_int_t _a_stride,
11228 : ae_int_t optypea,
11229 : double beta,
11230 : ae_complex *_c,
11231 : ae_int_t _c_stride,
11232 : ae_bool isupper)
11233 : {
11234 : /*
11235 : * local buffers
11236 : */
11237 : double *arow, *crow;
11238 : ae_complex c_alpha, c_beta;
11239 : ae_int_t i;
11240 : double _loc_abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
11241 : double _loc_cbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
11242 : double _loc_tmpbuf[2*alglib_c_block+alglib_simd_alignment];
11243 0 : double * const abuf = (double *) ae_align(_loc_abuf, alglib_simd_alignment);
11244 0 : double * const cbuf = (double *) ae_align(_loc_cbuf, alglib_simd_alignment);
11245 0 : double * const tmpbuf = (double *) ae_align(_loc_tmpbuf,alglib_simd_alignment);
11246 :
11247 0 : if( n>alglib_c_block || k>alglib_c_block )
11248 0 : return ae_false;
11249 0 : if( n==0 )
11250 0 : return ae_true;
11251 :
11252 : /*
11253 : * copy A and C, task is transformed to "A*A^H"-form.
11254 : * if beta==0, then C is filled by zeros (and not referenced)
11255 : *
11256 : * alpha==0 or k==0 are correctly processed (A is not referenced)
11257 : */
11258 0 : c_alpha.x = alpha;
11259 0 : c_alpha.y = 0;
11260 0 : c_beta.x = beta;
11261 0 : c_beta.y = 0;
11262 0 : if( alpha==0 )
11263 0 : k = 0;
11264 0 : if( k>0 )
11265 : {
11266 0 : if( optypea==0 )
11267 0 : _ialglib_mcopyblock_complex(n, k, _a, 3, _a_stride, abuf);
11268 : else
11269 0 : _ialglib_mcopyblock_complex(k, n, _a, 1, _a_stride, abuf);
11270 : }
11271 0 : _ialglib_mcopyblock_complex(n, n, _c, 0, _c_stride, cbuf);
11272 0 : if( beta==0 )
11273 : {
11274 0 : for(i=0,crow=cbuf; i<n; i++,crow+=2*alglib_c_block)
11275 0 : if( isupper )
11276 0 : _ialglib_vzero(2*(n-i), crow+2*i, 1);
11277 : else
11278 0 : _ialglib_vzero(2*(i+1), crow, 1);
11279 : }
11280 :
11281 :
11282 : /*
11283 : * update C
11284 : */
11285 0 : if( isupper )
11286 : {
11287 0 : for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=2*alglib_c_block,crow+=2*alglib_c_block)
11288 : {
11289 0 : _ialglib_vcopy_dcomplex(k, arow, 1, tmpbuf, 1, "Conj");
11290 0 : _ialglib_cmv(n-i, k, arow, tmpbuf, NULL, crow+2*i, 1, c_alpha, c_beta);
11291 : }
11292 : }
11293 : else
11294 : {
11295 0 : for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=2*alglib_c_block,crow+=2*alglib_c_block)
11296 : {
11297 0 : _ialglib_vcopy_dcomplex(k, arow, 1, tmpbuf, 1, "Conj");
11298 0 : _ialglib_cmv(i+1, k, abuf, tmpbuf, NULL, crow, 1, c_alpha, c_beta);
11299 : }
11300 : }
11301 :
11302 : /*
11303 : * copy back
11304 : */
11305 0 : _ialglib_mcopyunblock_complex(n, n, cbuf, 0, _c, _c_stride);
11306 :
11307 0 : return ae_true;
11308 : }
11309 :
11310 :
11311 : /********************************************************************
11312 : real SYRK kernel
11313 : ********************************************************************/
11314 0 : ae_bool _ialglib_rmatrixsyrk(ae_int_t n,
11315 : ae_int_t k,
11316 : double alpha,
11317 : double *_a,
11318 : ae_int_t _a_stride,
11319 : ae_int_t optypea,
11320 : double beta,
11321 : double *_c,
11322 : ae_int_t _c_stride,
11323 : ae_bool isupper)
11324 : {
11325 : /*
11326 : * local buffers
11327 : */
11328 : double *arow, *crow;
11329 : ae_int_t i;
11330 : double _loc_abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
11331 : double _loc_cbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
11332 0 : double * const abuf = (double *) ae_align(_loc_abuf, alglib_simd_alignment);
11333 0 : double * const cbuf = (double *) ae_align(_loc_cbuf, alglib_simd_alignment);
11334 :
11335 0 : if( n>alglib_r_block || k>alglib_r_block )
11336 0 : return ae_false;
11337 0 : if( n==0 )
11338 0 : return ae_true;
11339 :
11340 : /*
11341 : * copy A and C, task is transformed to "A*A^T"-form.
11342 : * if beta==0, then C is filled by zeros (and not referenced)
11343 : *
11344 : * alpha==0 or k==0 are correctly processed (A is not referenced)
11345 : */
11346 0 : if( alpha==0 )
11347 0 : k = 0;
11348 0 : if( k>0 )
11349 : {
11350 0 : if( optypea==0 )
11351 0 : _ialglib_mcopyblock(n, k, _a, 0, _a_stride, abuf);
11352 : else
11353 0 : _ialglib_mcopyblock(k, n, _a, 1, _a_stride, abuf);
11354 : }
11355 0 : _ialglib_mcopyblock(n, n, _c, 0, _c_stride, cbuf);
11356 0 : if( beta==0 )
11357 : {
11358 0 : for(i=0,crow=cbuf; i<n; i++,crow+=alglib_r_block)
11359 0 : if( isupper )
11360 0 : _ialglib_vzero(n-i, crow+i, 1);
11361 : else
11362 0 : _ialglib_vzero(i+1, crow, 1);
11363 : }
11364 :
11365 :
11366 : /*
11367 : * update C
11368 : */
11369 0 : if( isupper )
11370 : {
11371 0 : for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=alglib_r_block,crow+=alglib_r_block)
11372 : {
11373 0 : _ialglib_rmv(n-i, k, arow, arow, crow+i, 1, alpha, beta);
11374 : }
11375 : }
11376 : else
11377 : {
11378 0 : for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=alglib_r_block,crow+=alglib_r_block)
11379 : {
11380 0 : _ialglib_rmv(i+1, k, abuf, arow, crow, 1, alpha, beta);
11381 : }
11382 : }
11383 :
11384 : /*
11385 : * copy back
11386 : */
11387 0 : _ialglib_mcopyunblock(n, n, cbuf, 0, _c, _c_stride);
11388 :
11389 0 : return ae_true;
11390 : }
11391 :
11392 :
11393 : /********************************************************************
11394 : complex rank-1 kernel
11395 : ********************************************************************/
11396 0 : ae_bool _ialglib_cmatrixrank1(ae_int_t m,
11397 : ae_int_t n,
11398 : ae_complex *_a,
11399 : ae_int_t _a_stride,
11400 : ae_complex *_u,
11401 : ae_complex *_v)
11402 : {
11403 : /*
11404 : * Locals
11405 : */
11406 : ae_complex *arow, *pu, *pv, *vtmp, *dst;
11407 0 : ae_int_t n2 = n/2;
11408 : ae_int_t i, j;
11409 :
11410 : /*
11411 : * Quick exit
11412 : */
11413 0 : if( m<=0 || n<=0 )
11414 0 : return ae_false;
11415 :
11416 :
11417 : /*
11418 : * update pairs of rows
11419 : */
11420 0 : arow = _a;
11421 0 : pu = _u;
11422 0 : vtmp = _v;
11423 0 : for(i=0; i<m; i++, arow+=_a_stride, pu++)
11424 : {
11425 : /*
11426 : * update by two
11427 : */
11428 0 : for(j=0,pv=vtmp, dst=arow; j<n2; j++, dst+=2, pv+=2)
11429 : {
11430 0 : double ux = pu[0].x;
11431 0 : double uy = pu[0].y;
11432 0 : double v0x = pv[0].x;
11433 0 : double v0y = pv[0].y;
11434 0 : double v1x = pv[1].x;
11435 0 : double v1y = pv[1].y;
11436 0 : dst[0].x += ux*v0x-uy*v0y;
11437 0 : dst[0].y += ux*v0y+uy*v0x;
11438 0 : dst[1].x += ux*v1x-uy*v1y;
11439 0 : dst[1].y += ux*v1y+uy*v1x;
11440 : }
11441 :
11442 : /*
11443 : * final update
11444 : */
11445 0 : if( n%2!=0 )
11446 : {
11447 0 : double ux = pu[0].x;
11448 0 : double uy = pu[0].y;
11449 0 : double vx = pv[0].x;
11450 0 : double vy = pv[0].y;
11451 0 : dst[0].x += ux*vx-uy*vy;
11452 0 : dst[0].y += ux*vy+uy*vx;
11453 : }
11454 : }
11455 0 : return ae_true;
11456 : }
11457 :
11458 :
11459 : /********************************************************************
11460 : real rank-1 kernel
11461 : deprecated version
11462 : ********************************************************************/
11463 0 : ae_bool _ialglib_rmatrixrank1(ae_int_t m,
11464 : ae_int_t n,
11465 : double *_a,
11466 : ae_int_t _a_stride,
11467 : double *_u,
11468 : double *_v)
11469 : {
11470 : /*
11471 : * Locals
11472 : */
11473 : double *arow0, *arow1, *pu, *pv, *vtmp, *dst0, *dst1;
11474 0 : ae_int_t m2 = m/2;
11475 0 : ae_int_t n2 = n/2;
11476 0 : ae_int_t stride = _a_stride;
11477 0 : ae_int_t stride2 = 2*_a_stride;
11478 : ae_int_t i, j;
11479 :
11480 : /*
11481 : * Quick exit
11482 : */
11483 0 : if( m<=0 || n<=0 )
11484 0 : return ae_false;
11485 :
11486 : /*
11487 : * update pairs of rows
11488 : */
11489 0 : arow0 = _a;
11490 0 : arow1 = arow0+stride;
11491 0 : pu = _u;
11492 0 : vtmp = _v;
11493 0 : for(i=0; i<m2; i++,arow0+=stride2,arow1+=stride2,pu+=2)
11494 : {
11495 : /*
11496 : * update by two
11497 : */
11498 0 : for(j=0,pv=vtmp, dst0=arow0, dst1=arow1; j<n2; j++, dst0+=2, dst1+=2, pv+=2)
11499 : {
11500 0 : dst0[0] += pu[0]*pv[0];
11501 0 : dst0[1] += pu[0]*pv[1];
11502 0 : dst1[0] += pu[1]*pv[0];
11503 0 : dst1[1] += pu[1]*pv[1];
11504 : }
11505 :
11506 : /*
11507 : * final update
11508 : */
11509 0 : if( n%2!=0 )
11510 : {
11511 0 : dst0[0] += pu[0]*pv[0];
11512 0 : dst1[0] += pu[1]*pv[0];
11513 : }
11514 : }
11515 :
11516 : /*
11517 : * update last row
11518 : */
11519 0 : if( m%2!=0 )
11520 : {
11521 : /*
11522 : * update by two
11523 : */
11524 0 : for(j=0,pv=vtmp, dst0=arow0; j<n2; j++, dst0+=2, pv+=2)
11525 : {
11526 0 : dst0[0] += pu[0]*pv[0];
11527 0 : dst0[1] += pu[0]*pv[1];
11528 : }
11529 :
11530 : /*
11531 : * final update
11532 : */
11533 0 : if( n%2!=0 )
11534 0 : dst0[0] += pu[0]*pv[0];
11535 : }
11536 0 : return ae_true;
11537 : }
11538 :
11539 :
11540 :
11541 : /********************************************************************
11542 : real rank-1 kernel
11543 : deprecated version
11544 : ********************************************************************/
11545 0 : ae_bool _ialglib_rmatrixger(ae_int_t m,
11546 : ae_int_t n,
11547 : double *_a,
11548 : ae_int_t _a_stride,
11549 : double alpha,
11550 : double *_u,
11551 : double *_v)
11552 : {
11553 : /*
11554 : * Locals
11555 : */
11556 : double *arow0, *arow1, *pu, *pv, *vtmp, *dst0, *dst1;
11557 0 : ae_int_t m2 = m/2;
11558 0 : ae_int_t n2 = n/2;
11559 0 : ae_int_t stride = _a_stride;
11560 0 : ae_int_t stride2 = 2*_a_stride;
11561 : ae_int_t i, j;
11562 :
11563 : /*
11564 : * Quick exit
11565 : */
11566 0 : if( m<=0 || n<=0 || alpha==0.0 )
11567 0 : return ae_false;
11568 :
11569 : /*
11570 : * update pairs of rows
11571 : */
11572 0 : arow0 = _a;
11573 0 : arow1 = arow0+stride;
11574 0 : pu = _u;
11575 0 : vtmp = _v;
11576 0 : for(i=0; i<m2; i++,arow0+=stride2,arow1+=stride2,pu+=2)
11577 : {
11578 0 : double au0 = alpha*pu[0];
11579 0 : double au1 = alpha*pu[1];
11580 :
11581 : /*
11582 : * update by two
11583 : */
11584 0 : for(j=0,pv=vtmp, dst0=arow0, dst1=arow1; j<n2; j++, dst0+=2, dst1+=2, pv+=2)
11585 : {
11586 0 : dst0[0] += au0*pv[0];
11587 0 : dst0[1] += au0*pv[1];
11588 0 : dst1[0] += au1*pv[0];
11589 0 : dst1[1] += au1*pv[1];
11590 : }
11591 :
11592 : /*
11593 : * final update
11594 : */
11595 0 : if( n%2!=0 )
11596 : {
11597 0 : dst0[0] += au0*pv[0];
11598 0 : dst1[0] += au1*pv[0];
11599 : }
11600 : }
11601 :
11602 : /*
11603 : * update last row
11604 : */
11605 0 : if( m%2!=0 )
11606 : {
11607 0 : double au0 = alpha*pu[0];
11608 :
11609 : /*
11610 : * update by two
11611 : */
11612 0 : for(j=0,pv=vtmp, dst0=arow0; j<n2; j++, dst0+=2, pv+=2)
11613 : {
11614 0 : dst0[0] += au0*pv[0];
11615 0 : dst0[1] += au0*pv[1];
11616 : }
11617 :
11618 : /*
11619 : * final update
11620 : */
11621 0 : if( n%2!=0 )
11622 0 : dst0[0] += au0*pv[0];
11623 : }
11624 0 : return ae_true;
11625 : }
11626 :
11627 : /********************************************************************
11628 : Interface functions for efficient kernels
11629 : ********************************************************************/
11630 0 : ae_bool _ialglib_i_rmatrixgemmf(ae_int_t m,
11631 : ae_int_t n,
11632 : ae_int_t k,
11633 : double alpha,
11634 : ae_matrix *_a,
11635 : ae_int_t ia,
11636 : ae_int_t ja,
11637 : ae_int_t optypea,
11638 : ae_matrix *_b,
11639 : ae_int_t ib,
11640 : ae_int_t jb,
11641 : ae_int_t optypeb,
11642 : double beta,
11643 : ae_matrix *_c,
11644 : ae_int_t ic,
11645 : ae_int_t jc)
11646 : {
11647 : /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11648 0 : if( alpha==0.0 || k==0 || n==0 || m==0)
11649 0 : return ae_false;
11650 :
11651 : /* handle with optimized ALGLIB kernel */
11652 0 : return _ialglib_rmatrixgemm(m, n, k, alpha, _a->ptr.pp_double[ia]+ja, _a->stride, optypea, _b->ptr.pp_double[ib]+jb, _b->stride, optypeb, beta, _c->ptr.pp_double[ic]+jc, _c->stride);
11653 : }
11654 :
11655 0 : ae_bool _ialglib_i_cmatrixgemmf(ae_int_t m,
11656 : ae_int_t n,
11657 : ae_int_t k,
11658 : ae_complex alpha,
11659 : ae_matrix *_a,
11660 : ae_int_t ia,
11661 : ae_int_t ja,
11662 : ae_int_t optypea,
11663 : ae_matrix *_b,
11664 : ae_int_t ib,
11665 : ae_int_t jb,
11666 : ae_int_t optypeb,
11667 : ae_complex beta,
11668 : ae_matrix *_c,
11669 : ae_int_t ic,
11670 : ae_int_t jc)
11671 : {
11672 : /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11673 0 : if( (alpha.x==0.0 && alpha.y==0) || k==0 || n==0 || m==0 )
11674 0 : return ae_false;
11675 :
11676 : /* handle with optimized ALGLIB kernel */
11677 0 : return _ialglib_cmatrixgemm(m, n, k, alpha, _a->ptr.pp_complex[ia]+ja, _a->stride, optypea, _b->ptr.pp_complex[ib]+jb, _b->stride, optypeb, beta, _c->ptr.pp_complex[ic]+jc, _c->stride);
11678 : }
11679 :
11680 0 : ae_bool _ialglib_i_cmatrixrighttrsmf(ae_int_t m,
11681 : ae_int_t n,
11682 : ae_matrix *a,
11683 : ae_int_t i1,
11684 : ae_int_t j1,
11685 : ae_bool isupper,
11686 : ae_bool isunit,
11687 : ae_int_t optype,
11688 : ae_matrix *x,
11689 : ae_int_t i2,
11690 : ae_int_t j2)
11691 : {
11692 : /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11693 0 : if( m==0 || n==0)
11694 0 : return ae_false;
11695 :
11696 : /* handle with optimized ALGLIB kernel */
11697 0 : return _ialglib_cmatrixrighttrsm(m, n, &a->ptr.pp_complex[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_complex[i2][j2], x->stride);
11698 : }
11699 :
11700 0 : ae_bool _ialglib_i_rmatrixrighttrsmf(ae_int_t m,
11701 : ae_int_t n,
11702 : ae_matrix *a,
11703 : ae_int_t i1,
11704 : ae_int_t j1,
11705 : ae_bool isupper,
11706 : ae_bool isunit,
11707 : ae_int_t optype,
11708 : ae_matrix *x,
11709 : ae_int_t i2,
11710 : ae_int_t j2)
11711 : {
11712 : /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11713 0 : if( m==0 || n==0)
11714 0 : return ae_false;
11715 :
11716 : /* handle with optimized ALGLIB kernel */
11717 0 : return _ialglib_rmatrixrighttrsm(m, n, &a->ptr.pp_double[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_double[i2][j2], x->stride);
11718 : }
11719 :
11720 0 : ae_bool _ialglib_i_cmatrixlefttrsmf(ae_int_t m,
11721 : ae_int_t n,
11722 : ae_matrix *a,
11723 : ae_int_t i1,
11724 : ae_int_t j1,
11725 : ae_bool isupper,
11726 : ae_bool isunit,
11727 : ae_int_t optype,
11728 : ae_matrix *x,
11729 : ae_int_t i2,
11730 : ae_int_t j2)
11731 : {
11732 : /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11733 0 : if( m==0 || n==0)
11734 0 : return ae_false;
11735 :
11736 : /* handle with optimized ALGLIB kernel */
11737 0 : return _ialglib_cmatrixlefttrsm(m, n, &a->ptr.pp_complex[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_complex[i2][j2], x->stride);
11738 : }
11739 :
11740 0 : ae_bool _ialglib_i_rmatrixlefttrsmf(ae_int_t m,
11741 : ae_int_t n,
11742 : ae_matrix *a,
11743 : ae_int_t i1,
11744 : ae_int_t j1,
11745 : ae_bool isupper,
11746 : ae_bool isunit,
11747 : ae_int_t optype,
11748 : ae_matrix *x,
11749 : ae_int_t i2,
11750 : ae_int_t j2)
11751 : {
11752 : /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11753 0 : if( m==0 || n==0)
11754 0 : return ae_false;
11755 :
11756 : /* handle with optimized ALGLIB kernel */
11757 0 : return _ialglib_rmatrixlefttrsm(m, n, &a->ptr.pp_double[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_double[i2][j2], x->stride);
11758 : }
11759 :
11760 0 : ae_bool _ialglib_i_cmatrixherkf(ae_int_t n,
11761 : ae_int_t k,
11762 : double alpha,
11763 : ae_matrix *a,
11764 : ae_int_t ia,
11765 : ae_int_t ja,
11766 : ae_int_t optypea,
11767 : double beta,
11768 : ae_matrix *c,
11769 : ae_int_t ic,
11770 : ae_int_t jc,
11771 : ae_bool isupper)
11772 : {
11773 : /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11774 0 : if( alpha==0.0 || k==0 || n==0)
11775 0 : return ae_false;
11776 :
11777 : /* ALGLIB kernel */
11778 0 : return _ialglib_cmatrixherk(n, k, alpha, &a->ptr.pp_complex[ia][ja], a->stride, optypea, beta, &c->ptr.pp_complex[ic][jc], c->stride, isupper);
11779 : }
11780 :
11781 0 : ae_bool _ialglib_i_rmatrixsyrkf(ae_int_t n,
11782 : ae_int_t k,
11783 : double alpha,
11784 : ae_matrix *a,
11785 : ae_int_t ia,
11786 : ae_int_t ja,
11787 : ae_int_t optypea,
11788 : double beta,
11789 : ae_matrix *c,
11790 : ae_int_t ic,
11791 : ae_int_t jc,
11792 : ae_bool isupper)
11793 : {
11794 : /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11795 0 : if( alpha==0.0 || k==0 || n==0)
11796 0 : return ae_false;
11797 :
11798 : /* ALGLIB kernel */
11799 0 : return _ialglib_rmatrixsyrk(n, k, alpha, &a->ptr.pp_double[ia][ja], a->stride, optypea, beta, &c->ptr.pp_double[ic][jc], c->stride, isupper);
11800 : }
11801 :
11802 0 : ae_bool _ialglib_i_cmatrixrank1f(ae_int_t m,
11803 : ae_int_t n,
11804 : ae_matrix *a,
11805 : ae_int_t ia,
11806 : ae_int_t ja,
11807 : ae_vector *u,
11808 : ae_int_t uoffs,
11809 : ae_vector *v,
11810 : ae_int_t voffs)
11811 : {
11812 0 : return _ialglib_cmatrixrank1(m, n, &a->ptr.pp_complex[ia][ja], a->stride, &u->ptr.p_complex[uoffs], &v->ptr.p_complex[voffs]);
11813 : }
11814 :
11815 0 : ae_bool _ialglib_i_rmatrixrank1f(ae_int_t m,
11816 : ae_int_t n,
11817 : ae_matrix *a,
11818 : ae_int_t ia,
11819 : ae_int_t ja,
11820 : ae_vector *u,
11821 : ae_int_t uoffs,
11822 : ae_vector *v,
11823 : ae_int_t voffs)
11824 : {
11825 0 : return _ialglib_rmatrixrank1(m, n, &a->ptr.pp_double[ia][ja], a->stride, &u->ptr.p_double[uoffs], &v->ptr.p_double[voffs]);
11826 : }
11827 :
11828 0 : ae_bool _ialglib_i_rmatrixgerf(ae_int_t m,
11829 : ae_int_t n,
11830 : ae_matrix *a,
11831 : ae_int_t ia,
11832 : ae_int_t ja,
11833 : double alpha,
11834 : ae_vector *u,
11835 : ae_int_t uoffs,
11836 : ae_vector *v,
11837 : ae_int_t voffs)
11838 : {
11839 0 : return _ialglib_rmatrixger(m, n, &a->ptr.pp_double[ia][ja], a->stride, alpha, &u->ptr.p_double[uoffs], &v->ptr.p_double[voffs]);
11840 : }
11841 :
11842 :
11843 :
11844 :
11845 : /********************************************************************
11846 : This function reads rectangular matrix A given by two column pointers
11847 : col0 and col1 and stride src_stride and moves it into contiguous row-
11848 : by-row storage given by dst.
11849 :
11850 : It can handle following special cases:
11851 : * col1==NULL in this case second column of A is filled by zeros
11852 : ********************************************************************/
11853 0 : void _ialglib_pack_n2(
11854 : double *col0,
11855 : double *col1,
11856 : ae_int_t n,
11857 : ae_int_t src_stride,
11858 : double *dst)
11859 : {
11860 : ae_int_t n2, j, stride2;
11861 :
11862 : /*
11863 : * handle special case
11864 : */
11865 0 : if( col1==NULL )
11866 : {
11867 0 : for(j=0; j<n; j++)
11868 : {
11869 0 : dst[0] = *col0;
11870 0 : dst[1] = 0.0;
11871 0 : col0 += src_stride;
11872 0 : dst += 2;
11873 : }
11874 0 : return;
11875 : }
11876 :
11877 : /*
11878 : * handle general case
11879 : */
11880 0 : n2 = n/2;
11881 0 : stride2 = src_stride*2;
11882 0 : for(j=0; j<n2; j++)
11883 : {
11884 0 : dst[0] = *col0;
11885 0 : dst[1] = *col1;
11886 0 : dst[2] = col0[src_stride];
11887 0 : dst[3] = col1[src_stride];
11888 0 : col0 += stride2;
11889 0 : col1 += stride2;
11890 0 : dst += 4;
11891 : }
11892 0 : if( n%2 )
11893 : {
11894 0 : dst[0] = *col0;
11895 0 : dst[1] = *col1;
11896 : }
11897 : }
11898 :
11899 : /*************************************************************************
11900 : This function reads rectangular matrix A given by two column pointers col0
11901 : and col1 and stride src_stride and moves it into contiguous row-by-row
11902 : storage given by dst.
11903 :
11904 : dst must be aligned, col0 and col1 may be non-aligned.
11905 :
11906 : It can handle following special cases:
11907 : * col1==NULL in this case second column of A is filled by zeros
11908 : * src_stride==1 efficient SSE-based code is used
11909 : * col1-col0==1 efficient SSE-based code is used
11910 :
11911 : This function supports SSE2; it can be used when:
11912 : 1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
11913 : 2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
11914 :
11915 : If you want to know whether it is safe to call it, you should check
11916 : results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
11917 : and will do its work.
11918 : *************************************************************************/
11919 : #if defined(AE_HAS_SSE2_INTRINSICS)
11920 : void _ialglib_pack_n2_sse2(
11921 : double *col0,
11922 : double *col1,
11923 : ae_int_t n,
11924 : ae_int_t src_stride,
11925 : double *dst)
11926 : {
11927 : ae_int_t n2, j, stride2;
11928 :
11929 : /*
11930 : * handle special case: col1==NULL
11931 : */
11932 : if( col1==NULL )
11933 : {
11934 : for(j=0; j<n; j++)
11935 : {
11936 : dst[0] = *col0;
11937 : dst[1] = 0.0;
11938 : col0 += src_stride;
11939 : dst += 2;
11940 : }
11941 : return;
11942 : }
11943 :
11944 : /*
11945 : * handle unit stride
11946 : */
11947 : if( src_stride==1 )
11948 : {
11949 : __m128d v0, v1;
11950 : n2 = n/2;
11951 : for(j=0; j<n2; j++)
11952 : {
11953 : v0 = _mm_loadu_pd(col0);
11954 : col0 += 2;
11955 : v1 = _mm_loadu_pd(col1);
11956 : col1 += 2;
11957 : _mm_store_pd(dst, _mm_unpacklo_pd(v0,v1));
11958 : _mm_store_pd(dst+2,_mm_unpackhi_pd(v0,v1));
11959 : dst += 4;
11960 : }
11961 : if( n%2 )
11962 : {
11963 : dst[0] = *col0;
11964 : dst[1] = *col1;
11965 : }
11966 : return;
11967 : }
11968 :
11969 : /*
11970 : * handle col1-col0==1
11971 : */
11972 : if( col1-col0==1 )
11973 : {
11974 : __m128d v0, v1;
11975 : n2 = n/2;
11976 : stride2 = 2*src_stride;
11977 : for(j=0; j<n2; j++)
11978 : {
11979 : v0 = _mm_loadu_pd(col0);
11980 : v1 = _mm_loadu_pd(col0+src_stride);
11981 : _mm_store_pd(dst, v0);
11982 : _mm_store_pd(dst+2,v1);
11983 : col0 += stride2;
11984 : dst += 4;
11985 : }
11986 : if( n%2 )
11987 : {
11988 : dst[0] = col0[0];
11989 : dst[1] = col0[1];
11990 : }
11991 : return;
11992 : }
11993 :
11994 : /*
11995 : * handle general case
11996 : */
11997 : n2 = n/2;
11998 : stride2 = src_stride*2;
11999 : for(j=0; j<n2; j++)
12000 : {
12001 : dst[0] = *col0;
12002 : dst[1] = *col1;
12003 : dst[2] = col0[src_stride];
12004 : dst[3] = col1[src_stride];
12005 : col0 += stride2;
12006 : col1 += stride2;
12007 : dst += 4;
12008 : }
12009 : if( n%2 )
12010 : {
12011 : dst[0] = *col0;
12012 : dst[1] = *col1;
12013 : }
12014 : }
12015 : #endif
12016 :
12017 :
12018 : /********************************************************************
12019 : This function calculates R := alpha*A'*B+beta*R where A and B are Kx2
12020 : matrices stored in contiguous row-by-row storage, R is 2x2 matrix
12021 : stored in non-contiguous row-by-row storage.
12022 :
12023 : A and B must be aligned; R may be non-aligned.
12024 :
12025 : If beta is zero, contents of R is ignored (not multiplied by zero -
12026 : just ignored).
12027 :
12028 : However, when alpha is zero, we still calculate A'*B, which is
12029 : multiplied by zero afterwards.
12030 :
12031 : Function accepts additional parameter store_mode:
12032 : * if 0, full R is stored
12033 : * if 1, only first row of R is stored
12034 : * if 2, only first column of R is stored
12035 : * if 3, only top left element of R is stored
12036 : ********************************************************************/
12037 0 : void _ialglib_mm22(double alpha, const double *a, const double *b, ae_int_t k, double beta, double *r, ae_int_t stride, ae_int_t store_mode)
12038 : {
12039 : double v00, v01, v10, v11;
12040 : ae_int_t t;
12041 0 : v00 = 0.0;
12042 0 : v01 = 0.0;
12043 0 : v10 = 0.0;
12044 0 : v11 = 0.0;
12045 0 : for(t=0; t<k; t++)
12046 : {
12047 0 : v00 += a[0]*b[0];
12048 0 : v01 += a[0]*b[1];
12049 0 : v10 += a[1]*b[0];
12050 0 : v11 += a[1]*b[1];
12051 0 : a+=2;
12052 0 : b+=2;
12053 : }
12054 0 : if( store_mode==0 )
12055 : {
12056 0 : if( beta==0 )
12057 : {
12058 0 : r[0] = alpha*v00;
12059 0 : r[1] = alpha*v01;
12060 0 : r[stride+0] = alpha*v10;
12061 0 : r[stride+1] = alpha*v11;
12062 : }
12063 : else
12064 : {
12065 0 : r[0] = beta*r[0] + alpha*v00;
12066 0 : r[1] = beta*r[1] + alpha*v01;
12067 0 : r[stride+0] = beta*r[stride+0] + alpha*v10;
12068 0 : r[stride+1] = beta*r[stride+1] + alpha*v11;
12069 : }
12070 0 : return;
12071 : }
12072 0 : if( store_mode==1 )
12073 : {
12074 0 : if( beta==0 )
12075 : {
12076 0 : r[0] = alpha*v00;
12077 0 : r[1] = alpha*v01;
12078 : }
12079 : else
12080 : {
12081 0 : r[0] = beta*r[0] + alpha*v00;
12082 0 : r[1] = beta*r[1] + alpha*v01;
12083 : }
12084 0 : return;
12085 : }
12086 0 : if( store_mode==2 )
12087 : {
12088 0 : if( beta==0 )
12089 : {
12090 0 : r[0] =alpha*v00;
12091 0 : r[stride+0] = alpha*v10;
12092 : }
12093 : else
12094 : {
12095 0 : r[0] = beta*r[0] + alpha*v00;
12096 0 : r[stride+0] = beta*r[stride+0] + alpha*v10;
12097 : }
12098 0 : return;
12099 : }
12100 0 : if( store_mode==3 )
12101 : {
12102 0 : if( beta==0 )
12103 : {
12104 0 : r[0] = alpha*v00;
12105 : }
12106 : else
12107 : {
12108 0 : r[0] = beta*r[0] + alpha*v00;
12109 : }
12110 0 : return;
12111 : }
12112 : }
12113 :
12114 :
12115 : /********************************************************************
12116 : This function calculates R := alpha*A'*B+beta*R where A and B are Kx2
12117 : matrices stored in contiguous row-by-row storage, R is 2x2 matrix
12118 : stored in non-contiguous row-by-row storage.
12119 :
12120 : A and B must be aligned; R may be non-aligned.
12121 :
12122 : If beta is zero, contents of R is ignored (not multiplied by zero -
12123 : just ignored).
12124 :
12125 : However, when alpha is zero, we still calculate A'*B, which is
12126 : multiplied by zero afterwards.
12127 :
12128 : Function accepts additional parameter store_mode:
12129 : * if 0, full R is stored
12130 : * if 1, only first row of R is stored
12131 : * if 2, only first column of R is stored
12132 : * if 3, only top left element of R is stored
12133 :
12134 : This function supports SSE2; it can be used when:
12135 : 1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
12136 : 2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
12137 :
12138 : If (1) is failed, this function will still be defined and callable, but it
12139 : will do nothing. If (2) is failed , call to this function will probably
12140 : crash your system.
12141 :
12142 : If you want to know whether it is safe to call it, you should check
12143 : results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
12144 : and will do its work.
12145 : ********************************************************************/
12146 : #if defined(AE_HAS_SSE2_INTRINSICS)
12147 : void _ialglib_mm22_sse2(double alpha, const double *a, const double *b, ae_int_t k, double beta, double *r, ae_int_t stride, ae_int_t store_mode)
12148 : {
12149 : /*
12150 : * We calculate product of two Kx2 matrices (result is 2x2).
12151 : * VA and VB store result as follows:
12152 : *
12153 : * [ VD[0] VE[0] ]
12154 : * A'*B = [ ]
12155 : * [ VE[1] VD[1] ]
12156 : *
12157 : */
12158 : __m128d va, vb, vd, ve, vt, r0, r1, valpha, vbeta;
12159 : ae_int_t t, k2;
12160 :
12161 : /*
12162 : * calculate product
12163 : */
12164 : k2 = k/2;
12165 : vd = _mm_setzero_pd();
12166 : ve = _mm_setzero_pd();
12167 : for(t=0; t<k2; t++)
12168 : {
12169 : vb = _mm_load_pd(b);
12170 : va = _mm_load_pd(a);
12171 : vt = vb;
12172 : vb = _mm_mul_pd(va,vb);
12173 : vt = _mm_shuffle_pd(vt, vt, 1);
12174 : vd = _mm_add_pd(vb,vd);
12175 : vt = _mm_mul_pd(va,vt);
12176 : vb = _mm_load_pd(b+2);
12177 : ve = _mm_add_pd(vt,ve);
12178 : va = _mm_load_pd(a+2);
12179 : vt = vb;
12180 : vb = _mm_mul_pd(va,vb);
12181 : vt = _mm_shuffle_pd(vt, vt, 1);
12182 : vd = _mm_add_pd(vb,vd);
12183 : vt = _mm_mul_pd(va,vt);
12184 : ve = _mm_add_pd(vt,ve);
12185 : a+=4;
12186 : b+=4;
12187 : }
12188 : if( k%2 )
12189 : {
12190 : va = _mm_load_pd(a);
12191 : vb = _mm_load_pd(b);
12192 : vt = _mm_shuffle_pd(vb, vb, 1);
12193 : vd = _mm_add_pd(_mm_mul_pd(va,vb),vd);
12194 : ve = _mm_add_pd(_mm_mul_pd(va,vt),ve);
12195 : }
12196 :
12197 : /*
12198 : * r0 is first row of alpha*A'*B, r1 is second row
12199 : */
12200 : valpha = _mm_load1_pd(&alpha);
12201 : r0 = _mm_mul_pd(_mm_unpacklo_pd(vd,ve),valpha);
12202 : r1 = _mm_mul_pd(_mm_unpackhi_pd(ve,vd),valpha);
12203 :
12204 : /*
12205 : * store
12206 : */
12207 : if( store_mode==0 )
12208 : {
12209 : if( beta==0 )
12210 : {
12211 : _mm_storeu_pd(r,r0);
12212 : _mm_storeu_pd(r+stride,r1);
12213 : }
12214 : else
12215 : {
12216 : vbeta = _mm_load1_pd(&beta);
12217 : _mm_storeu_pd(r,_mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r),vbeta),r0));
12218 : _mm_storeu_pd(r+stride,_mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+stride),vbeta),r1));
12219 : }
12220 : return;
12221 : }
12222 : if( store_mode==1 )
12223 : {
12224 : if( beta==0 )
12225 : _mm_storeu_pd(r,r0);
12226 : else
12227 : _mm_storeu_pd(r,_mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r),_mm_load1_pd(&beta)),r0));
12228 : return;
12229 : }
12230 : if( store_mode==2 )
12231 : {
12232 : double buf[4];
12233 : _mm_storeu_pd(buf,r0);
12234 : _mm_storeu_pd(buf+2,r1);
12235 : if( beta==0 )
12236 : {
12237 : r[0] =buf[0];
12238 : r[stride+0] = buf[2];
12239 : }
12240 : else
12241 : {
12242 : r[0] = beta*r[0] + buf[0];
12243 : r[stride+0] = beta*r[stride+0] + buf[2];
12244 : }
12245 : return;
12246 : }
12247 : if( store_mode==3 )
12248 : {
12249 : double buf[2];
12250 : _mm_storeu_pd(buf,r0);
12251 : if( beta==0 )
12252 : r[0] = buf[0];
12253 : else
12254 : r[0] = beta*r[0] + buf[0];
12255 : return;
12256 : }
12257 : }
12258 : #endif
12259 :
12260 :
12261 : /*************************************************************************
12262 : This function calculates R := alpha*A'*(B0|B1)+beta*R where A, B0 and B1
12263 : are Kx2 matrices stored in contiguous row-by-row storage, R is 2x4 matrix
12264 : stored in non-contiguous row-by-row storage.
12265 :
12266 : A, B0 and B1 must be aligned; R may be non-aligned.
12267 :
12268 : Note that B0 and B1 are two separate matrices stored in different
12269 : locations.
12270 :
12271 : If beta is zero, contents of R is ignored (not multiplied by zero - just
12272 : ignored).
12273 :
12274 : However, when alpha is zero , we still calculate MM product, which is
12275 : multiplied by zero afterwards.
12276 :
12277 : Unlike mm22 functions, this function does NOT support partial output of R
12278 : - we always store full 2x4 matrix.
12279 : *************************************************************************/
12280 0 : void _ialglib_mm22x2(double alpha, const double *a, const double *b0, const double *b1, ae_int_t k, double beta, double *r, ae_int_t stride)
12281 : {
12282 0 : _ialglib_mm22(alpha, a, b0, k, beta, r, stride, 0);
12283 0 : _ialglib_mm22(alpha, a, b1, k, beta, r+2, stride, 0);
12284 0 : }
12285 :
12286 : /*************************************************************************
12287 : This function calculates R := alpha*A'*(B0|B1)+beta*R where A, B0 and B1
12288 : are Kx2 matrices stored in contiguous row-by-row storage, R is 2x4 matrix
12289 : stored in non-contiguous row-by-row storage.
12290 :
12291 : A, B0 and B1 must be aligned; R may be non-aligned.
12292 :
12293 : Note that B0 and B1 are two separate matrices stored in different
12294 : locations.
12295 :
12296 : If beta is zero, contents of R is ignored (not multiplied by zero - just
12297 : ignored).
12298 :
12299 : However, when alpha is zero , we still calculate MM product, which is
12300 : multiplied by zero afterwards.
12301 :
12302 : Unlike mm22 functions, this function does NOT support partial output of R
12303 : - we always store full 2x4 matrix.
12304 :
12305 : This function supports SSE2; it can be used when:
12306 : 1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
12307 : 2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
12308 :
12309 : If (1) is failed, this function will still be defined and callable, but it
12310 : will do nothing. If (2) is failed , call to this function will probably
12311 : crash your system.
12312 :
12313 : If you want to know whether it is safe to call it, you should check
12314 : results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
12315 : and will do its work.
12316 : *************************************************************************/
12317 : #if defined(AE_HAS_SSE2_INTRINSICS)
12318 : void _ialglib_mm22x2_sse2(double alpha, const double *a, const double *b0, const double *b1, ae_int_t k, double beta, double *r, ae_int_t stride)
12319 : {
12320 : /*
12321 : * We calculate product of two Kx2 matrices (result is 2x2).
12322 : * V0, V1, V2, V3 store result as follows:
12323 : *
12324 : * [ V0[0] V1[1] V2[0] V3[1] ]
12325 : * R = [ ]
12326 : * [ V1[0] V0[1] V3[0] V2[1] ]
12327 : *
12328 : * VA0 stores current 1x2 block of A, VA1 stores shuffle of VA0,
12329 : * VB0 and VB1 are used to store two copies of 1x2 block of B0 or B1
12330 : * (both vars store same data - either B0 or B1). Results from multiplication
12331 : * by VA0/VA1 are stored in VB0/VB1 too.
12332 : *
12333 : */
12334 : __m128d v0, v1, v2, v3, va0, va1, vb0, vb1;
12335 : __m128d r00, r01, r10, r11, valpha, vbeta;
12336 : ae_int_t t;
12337 :
12338 : v0 = _mm_setzero_pd();
12339 : v1 = _mm_setzero_pd();
12340 : v2 = _mm_setzero_pd();
12341 : v3 = _mm_setzero_pd();
12342 : for(t=0; t<k; t++)
12343 : {
12344 : va0 = _mm_load_pd(a);
12345 : vb0 = _mm_load_pd(b0);
12346 : va1 = _mm_load_pd(a);
12347 :
12348 : vb0 = _mm_mul_pd(va0,vb0);
12349 : vb1 = _mm_load_pd(b0);
12350 : v0 = _mm_add_pd(v0,vb0);
12351 : vb1 = _mm_mul_pd(va1,vb1);
12352 : vb0 = _mm_load_pd(b1);
12353 : v1 = _mm_add_pd(v1,vb1);
12354 :
12355 : vb0 = _mm_mul_pd(va0,vb0);
12356 : vb1 = _mm_load_pd(b1);
12357 : v2 = _mm_add_pd(v2,vb0);
12358 : vb1 = _mm_mul_pd(va1,vb1);
12359 : v3 = _mm_add_pd(v3,vb1);
12360 :
12361 : a+=2;
12362 : b0+=2;
12363 : b1+=2;
12364 : }
12365 :
12366 : /*
12367 : * shuffle V1 and V3 (conversion to more convenient storage format):
12368 : *
12369 : * [ V0[0] V1[0] V2[0] V3[0] ]
12370 : * R = [ ]
12371 : * [ V1[1] V0[1] V3[1] V2[1] ]
12372 : *
12373 : * unpack results to
12374 : *
12375 : * [ r00 r01 ]
12376 : * [ r10 r11 ]
12377 : *
12378 : */
12379 : valpha = _mm_load1_pd(&alpha);
12380 : v1 = _mm_shuffle_pd(v1, v1, 1);
12381 : v3 = _mm_shuffle_pd(v3, v3, 1);
12382 : r00 = _mm_mul_pd(_mm_unpacklo_pd(v0,v1),valpha);
12383 : r10 = _mm_mul_pd(_mm_unpackhi_pd(v1,v0),valpha);
12384 : r01 = _mm_mul_pd(_mm_unpacklo_pd(v2,v3),valpha);
12385 : r11 = _mm_mul_pd(_mm_unpackhi_pd(v3,v2),valpha);
12386 :
12387 : /*
12388 : * store
12389 : */
12390 : if( beta==0 )
12391 : {
12392 : _mm_storeu_pd(r,r00);
12393 : _mm_storeu_pd(r+2,r01);
12394 : _mm_storeu_pd(r+stride,r10);
12395 : _mm_storeu_pd(r+stride+2,r11);
12396 : }
12397 : else
12398 : {
12399 : vbeta = _mm_load1_pd(&beta);
12400 : _mm_storeu_pd(r, _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r),vbeta),r00));
12401 : _mm_storeu_pd(r+2, _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+2),vbeta),r01));
12402 : _mm_storeu_pd(r+stride, _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+stride),vbeta),r10));
12403 : _mm_storeu_pd(r+stride+2, _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+stride+2),vbeta),r11));
12404 : }
12405 : }
12406 : #endif
12407 :
12408 : }
12409 :
12410 :
12411 : /////////////////////////////////////////////////////////////////////////
12412 : //
12413 : // THIS SECTION CONTAINS PARALLEL SUBROUTINES
12414 : //
12415 : /////////////////////////////////////////////////////////////////////////
12416 : namespace alglib_impl
12417 : {
12418 :
12419 :
12420 : }
12421 :
|