numactl --interleave=all ./testing_sgeqrf -N 100 -N 1000 --range 10:90:10 --range 100:900:100 --range 1000:9000:1000 --range 10000:20000:2000
MAGMA 1.6.1  compiled for CUDA capability >= 3.5
CUDA runtime 7000, driver 7000. OpenMP threads 16. MKL 11.2.3, MKL threads 16. 
ndevices 3
device 0: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 1: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 2: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
Usage: ./testing_sgeqrf [options] [-h|--help]

ngpu 1
    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)   |R - Q^H*A|   |I - Q^H*Q|
===============================================================================
  100   100     ---   (  ---  )      0.82 (   0.00)       ---
 1000  1000     ---   (  ---  )    107.87 (   0.01)       ---
   10    10     ---   (  ---  )      0.01 (   0.00)       ---
   20    20     ---   (  ---  )      0.04 (   0.00)       ---
   30    30     ---   (  ---  )      0.12 (   0.00)       ---
   40    40     ---   (  ---  )      1.03 (   0.00)       ---
   50    50     ---   (  ---  )      1.44 (   0.00)       ---
   60    60     ---   (  ---  )      2.17 (   0.00)       ---
   70    70     ---   (  ---  )      2.50 (   0.00)       ---
   80    80     ---   (  ---  )      1.47 (   0.00)       ---
   90    90     ---   (  ---  )      1.75 (   0.00)       ---
  100   100     ---   (  ---  )      1.50 (   0.00)       ---
  200   200     ---   (  ---  )      6.31 (   0.00)       ---
  300   300     ---   (  ---  )     14.22 (   0.00)       ---
  400   400     ---   (  ---  )     25.10 (   0.00)       ---
  500   500     ---   (  ---  )     37.82 (   0.00)       ---
  600   600     ---   (  ---  )     54.27 (   0.01)       ---
  700   700     ---   (  ---  )     68.37 (   0.01)       ---
  800   800     ---   (  ---  )     84.13 (   0.01)       ---
  900   900     ---   (  ---  )    103.83 (   0.01)       ---
 1000  1000     ---   (  ---  )    119.84 (   0.01)       ---
 2000  2000     ---   (  ---  )    338.30 (   0.03)       ---
 3000  3000     ---   (  ---  )    571.34 (   0.06)       ---
 4000  4000     ---   (  ---  )    729.00 (   0.12)       ---
 5000  5000     ---   (  ---  )    904.31 (   0.18)       ---
 6000  6000     ---   (  ---  )   1031.58 (   0.28)       ---
 7000  7000     ---   (  ---  )   1109.99 (   0.41)       ---
 8000  8000     ---   (  ---  )   1303.37 (   0.52)       ---
 9000  9000     ---   (  ---  )   1431.33 (   0.68)       ---
10000 10000     ---   (  ---  )   1526.08 (   0.87)       ---
12000 12000     ---   (  ---  )   1685.03 (   1.37)       ---
14000 14000     ---   (  ---  )   1767.31 (   2.07)       ---
16000 16000     ---   (  ---  )   1886.81 (   2.89)       ---
18000 18000     ---   (  ---  )   1885.45 (   4.12)       ---
20000 20000     ---   (  ---  )   1989.43 (   5.36)       ---

numactl --interleave=all ./testing_sgeqrf_gpu -N 100 -N 1000 --range 10:90:10 --range 100:900:100 --range 1000:9000:1000 --range 10000:20000:2000
MAGMA 1.6.1  compiled for CUDA capability >= 3.5
CUDA runtime 7000, driver 7000. OpenMP threads 16. MKL 11.2.3, MKL threads 16. 
ndevices 3
device 0: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 1: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 2: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
Usage: ./testing_sgeqrf_gpu [options] [-h|--help]

version 1
    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)    |b - A*x|
================================================================
  100   100     ---   (  ---  )      0.54 (   0.00)       ---
 1000  1000     ---   (  ---  )     83.00 (   0.02)       ---
   10    10     ---   (  ---  )      0.00 (   0.00)       ---
   20    20     ---   (  ---  )      0.01 (   0.00)       ---
   30    30     ---   (  ---  )      0.03 (   0.00)       ---
   40    40     ---   (  ---  )      0.07 (   0.00)       ---
   50    50     ---   (  ---  )      0.13 (   0.00)       ---
   60    60     ---   (  ---  )      0.20 (   0.00)       ---
   70    70     ---   (  ---  )      0.30 (   0.00)       ---
   80    80     ---   (  ---  )      0.43 (   0.00)       ---
   90    90     ---   (  ---  )      0.57 (   0.00)       ---
  100   100     ---   (  ---  )      0.61 (   0.00)       ---
  200   200     ---   (  ---  )      6.25 (   0.00)       ---
  300   300     ---   (  ---  )     13.31 (   0.00)       ---
  400   400     ---   (  ---  )     21.97 (   0.00)       ---
  500   500     ---   (  ---  )     25.64 (   0.01)       ---
  600   600     ---   (  ---  )     38.07 (   0.01)       ---
  700   700     ---   (  ---  )     48.45 (   0.01)       ---
  800   800     ---   (  ---  )     62.14 (   0.01)       ---
  900   900     ---   (  ---  )     74.98 (   0.01)       ---
 1000  1000     ---   (  ---  )     90.56 (   0.01)       ---
 2000  2000     ---   (  ---  )    257.53 (   0.04)       ---
 3000  3000     ---   (  ---  )    455.14 (   0.08)       ---
 4000  4000     ---   (  ---  )    643.10 (   0.13)       ---
 5000  5000     ---   (  ---  )    846.73 (   0.20)       ---
 6000  6000     ---   (  ---  )    986.20 (   0.29)       ---
 7000  7000     ---   (  ---  )   1062.77 (   0.43)       ---
 8000  8000     ---   (  ---  )   1305.88 (   0.52)       ---
 9000  9000     ---   (  ---  )   1409.01 (   0.69)       ---
10000 10000     ---   (  ---  )   1480.54 (   0.90)       ---
12000 12000     ---   (  ---  )   1568.22 (   1.47)       ---
14000 14000     ---   (  ---  )   1680.74 (   2.18)       ---
16000 16000     ---   (  ---  )   1836.92 (   2.97)       ---
18000 18000     ---   (  ---  )   1877.23 (   4.14)       ---
20000 20000     ---   (  ---  )   1970.03 (   5.41)       ---
