Linux  R2.6.32-358.18.1.el6.x86_64 FORTRAN90/SX         Rev.482        Wed Jan 14 14:55:25 2015
FILE NAME: mat_tune1.f
PROGRAM NAME: _MAIN
DIAGNOSTIC LIST

  LINE  LEVEL( NO.): DIAGNOSTIC MESSAGE

    11  opt  (1592): Outer loop unrolled inside inner loop.
    12  vec  (   1): Vectorized loop.
    12  vec  (  29): ADB is used for array.: b
    12  vec  (  29): ADB is used for array.: a
    26  opt  (1592): Outer loop unrolled inside inner loop.
    26  vec  (   1): Vectorized loop.
    26  vec  (  25): Work vectors are used. Size=8192byte
    26  vec  (  29): ADB is used for array.: c
    27  vec  (   1): Vectorized loop.
    27  vec  (  29): ADB is used for array.: a
    27  vec  (  29): ADB is used for array.: b
    28  opt  (1222): Procedure "mul" expanded inline.
Linux  R2.6.32-358.18.1.el6.x86_64 FORTRAN90/SX         Rev.482        Wed Jan 14 14:55:25 2015
FILE NAME: mat_tune1.f
PROGRAM NAME: _MAIN
TRANSFORMATION LIST

  LINE                   FORTRAN STATEMENT

     1  ! practice_4 (original)
     2  !
     3  !
     4        implicit real(8)(a-h,o-z)
     5        parameter ( n=1024 , moda=0 )
     6        real(8) a(n+moda,n),b(n+moda,n)
     7        real(4) c(n+moda,n)
     8        real(4) etime,cp1(2),cp2(2),t1,t2
     9        real(8) flop,xgflops
    10  
    11        do j = 1,n
    12          do i = 1,n
    13            a(i,j) = 0.0d0
    14            b(i,j) = n+1-max(i,j)
    15            c(i,j) = n+1-max(i,j)
    16          enddo
    17        enddo
     .        do j = 1, 1024, 4                                                 
     .  !cdir    nodep                                                          
     .  !cdir    on_adb(a,b)                                                    
     .           do i = 1, 1024                                                 
     .              a(i,j) = 0.0000000000000000e+000                            
     .              a(i,j+1) = 0.0000000000000000e+000                          
     .              a(i,j+2) = 0.0000000000000000e+000                          
     .              a(i,j+3) = 0.0000000000000000e+000                          
     .              b(i,j) = dfloat(1025 - max(i,j))                            
     .              b(i,j+1) = dfloat(1025 - max(i,1 + j))                      
     .              b(i,j+2) = dfloat(1025 - max(i,2 + j))                      
     .              b(i,j+3) = dfloat(1025 - max(i,3 + j))                      
     .              c(i,j) = float(1025 - max(i,j))                             
     .              c(i,j+1) = float(1025 - max(i,1 + j))                       
     .              c(i,j+2) = float(1025 - max(i,2 + j))                       
     .              c(i,j+3) = float(1025 - max(i,3 + j))                       
     .           enddo                                                          
     .        enddo                                                             
    18  
    19        write(6,50) 'matrix_size =',n,' p_name       |'
    20       &          //'  user(sec)|   moda       check'
    21     50 format(1x,a,i5/1x,a)
    22  
    23        t1=etime(cp1)
    24  !     call ftrace_region_begin('Main-loop')
    25        do j=1,n
    26          do k=1,n
    27            do i=1,n
    28              call mul(n, moda, i, j, k, a, b, c)
    29            end do
    30          end do
     .        do k = 1, 1024, 4                                                 
     .  !cdir    nodep                                                          
     .  !cdir    on_adb(a,b)                                                    
     .           do i = 1, 1024                                                 
     .              a(i,j) = a(i,j) + b(i,k)*dble(c(k,j)) + b(i,k+1)*dble(c(k+1,
     .       1         j)) + b(i,k+2)*dble(c(k+2,j)) + b(i,k+3)*dble(c(k+3,j))  
     .           enddo                                                          
     .        enddo                                                             
    31        end do
    32  !     call ftrace_region_end('Main-loop')
    33        t2=etime(cp2)
    34        t2=t2-t1
    35  
    36        flop=dble(n)*dble(n)*dble(n)*2.0d0
    37        xgflops=flop/t2*1.0d-9
    38  
    39        write(6,60) ' mat_tune2.f_ |',t2,moda,a(n,n)
    40     60 format(1x,a,f10.3,' |',i6,4x,d24.15)
    41        write(6,70) xgflops,' (GFlops)'
    42     70 format(f10.3,a)
    43  
    44        stop
    45        end
Linux  R2.6.32-358.18.1.el6.x86_64 FORTRAN90/SX         Rev.482        Wed Jan 14 14:55:25 2015
FILE NAME: mat_tune1.f
PROGRAM NAME: _MAIN
FORMAT LIST

  LINE    LOOP      FORTRAN STATEMENT

     1:             ! practice_4 (original)
     2:             !
     3:             !
     4:                   implicit real(8)(a-h,o-z)
     5:                   parameter ( n=1024 , moda=0 )
     6:                   real(8) a(n+moda,n),b(n+moda,n)
     7:                   real(4) c(n+moda,n)
     8:                   real(4) etime,cp1(2),cp2(2),t1,t2
     9:                   real(8) flop,xgflops
    10:             
    11: +------>          do j = 1,n
    12: |V----->            do i = 1,n
    13: ||      A             a(i,j) = 0.0d0
    14: ||      A             b(i,j) = n+1-max(i,j)
    15: ||                    c(i,j) = n+1-max(i,j)
    16: |V-----             enddo
    17: +------           enddo
    18:             
    19:                   write(6,50) 'matrix_size =',n,' p_name       |'
    20:                  &          //'  user(sec)|   moda       check'
    21:                50 format(1x,a,i5/1x,a)
    22:             
    23:                   t1=etime(cp1)
    24:             !     call ftrace_region_begin('Main-loop')
    25: +------>          do j=1,n
    26: |V----->            do k=1,n
    27: ||V---->              do i=1,n
    28: |||     A I             call mul(n, moda, i, j, k, a, b, c)
    29: ||V----               end do
    30: |V-----             end do
    31: +------           end do
    32:             !     call ftrace_region_end('Main-loop')
    33:                   t2=etime(cp2)
    34:                   t2=t2-t1
    35:             
    36:                   flop=dble(n)*dble(n)*dble(n)*2.0d0
    37:                   xgflops=flop/t2*1.0d-9
    38:             
    39:                   write(6,60) ' mat_tune2.f_ |',t2,moda,a(n,n)
    40:                60 format(1x,a,f10.3,' |',i6,4x,d24.15)
    41:                   write(6,70) xgflops,' (GFlops)'
    42:                70 format(f10.3,a)
    43:             
    44:                   stop
    45:                   end
