
! tune3_blocking
! f95 -O2   mat_tune3_blk.f

      implicit real(8)(a-h,o-z)
      parameter ( n=2048 , mod=0 )
!
!     parameter ( KB=128, IB=512 )
!
      real(8) a(n+mod,n),b(n+mod,n)
      real(4) c(n+mod,n)
      real(4) etime,cp1(2),cp2(2),t1,t2
      real(8) flop,xgflops

      do j = 1,n
        do i = 1,n
          a(i,j) = 0.0d0
          b(i,j) = n+1-max(i,j)
          c(i,j) = n+1-max(i,j)
        enddo
      enddo

      write(6,50) 'matrix_size =',n,' p_name       |'
     &          //'  user(sec)|   mod       check'
   50 format(1x,a,i5/1x,a)

      t1=etime(cp1)

!!    do kk=1,n,KB
!!    do ii=1,n,IB
      do j=1,n,4
        do k=1,n,4
!!      do k=??, ?????, 4
          do i=1,n
!!        do i=??, ?????

            a(i,j  )=a(i,j)+b(i,k  )*c(k  ,j)
     &                     +b(i,k+1)*c(k+1,j)
     &                     +b(i,k+2)*c(k+2,j)
     &                     +b(i,k+3)*c(k+3,j)
            a(i,j+1)=a(i,j+1)+b(i,k  )*c(k  ,j+1)
     &                       +b(i,k+1)*c(k+1,j+1)
     &                       +b(i,k+2)*c(k+2,j+1)
     &                       +b(i,k+3)*c(k+3,j+1)
            a(i,j+2)=a(i,j+2)+b(i,k  )*c(k  ,j+2)
     &                       +b(i,k+1)*c(k+1,j+2)
     &                       +b(i,k+2)*c(k+2,j+2)
     &                       +b(i,k+3)*c(k+3,j+2)
            a(i,j+3)=a(i,j+3)+b(i,k  )*c(k  ,j+3)
     &                       +b(i,k+1)*c(k+1,j+3)
     &                       +b(i,k+2)*c(k+2,j+3)
     &                       +b(i,k+3)*c(k+3,j+3)
          enddo
        enddo
      enddo
!!    enddo
!!    enddo

      t2=etime(cp2)
      t2=t2-t1

      flop=dble(n)*dble(n)*dble(n)*2.0d0
      xgflops=flop/t2*1.0d-9

      write(6,60) ' mat_tune3_blk.f |',t2,mod,a(n,n)
   60 format(1x,a,f10.3,' |',i6,4x,d24.15)
      write(6,70) xgflops,' (GFlops)'
   70 format(f10.3,a)

      stop
      end

