c program mwave2 c c MPI (Massage Passing Interface) cc hwave2.f main program of 2-dimensional wave equtaion c modified leap-frog method 2000.06.17 implicit real*8 (a-h,o-z) c parameter(npe=8) CC MPI START include 'mpif.h' integer istatus(mpi_status_size) c common /para_info/ks,ks1,ke,ke1,kss,irank,isize common /para_info/ks,ks1,kgs,ke,ke1,kge,kss,irank,isize c for mpi_gatherv parameter (npe=2) integer recvcount(npe),displs(npe) CC MPI END !hpf$ processors pe(npe) parameter(nx=100,ny=100) c parameter(nb=3,iip0=8,iiq0=4,iir0=4,last=4) parameter(nb=3,iip0=8,iiq0=1,iir0=1,last=4) parameter(nx1=nx+1,nx2=nx+2,ny1=ny+1,ny2=ny+2) parameter(n1=nx2,n2=n1*ny2,n3=n2*nb,noinp=30) parameter(thx=0.5,tam=0.5,vis0=0.10) c c dimension f(nx2,ny2,nb),u(nx2,ny2,nb) c dimension gf(nx2,ny2,nb),gu(nx2,ny2,nb) c dimension v(nx2,ny2,nb) CC MPI START parameter(nyy=(ny2-1)/npe+1) dimension f(nx2,0:nyy+1,nb),u(nx2,0:nyy+1,nb), 1 v(nx2,0:nyy+1,nb) c for all_gather c dimension fg(nx2,ny2) CC MPI END dimension p(nb),ppin(10) c real*8 zt0,zt1,zt2,zt c !hpf$ distribute f(*,block,*) onto pe !hpf$ distribute u(*,block,*) onto pe !hpf$ distribute v(*,block,*) onto pe !hpf$ shadow f(0,1:1,0) !hpf$ shadow u(0,1:1,0) !hpf$ shadow v(0,1:1,0) !hpf$ asyncid id1 c c equivalence (gf,f),(gu,u) common /blk/f c c ppin xl yl dxl dyl dn dv 7 8 9 10 data ppin/101.0,101.0,10.0,10.0,1.0,0.0, 7, 8, 9, 10/ c CC MPI START call mpi_init(ier) call mpi_comm_rank(mpi_comm_world,irank,ier) call mpi_comm_size(mpi_comm_world,isize,ier) c kk=ny2/isize kmod=mod(ny2,isize) c ks=1 kss=irank*kk+min(kmod,irank) c if (irank.lt.kmod) kk=kk+1 ke=ks+kk-1 kgs=ks+kss kge=ke+kss ks1=ks ke1=ke if (irank.eq.0) ks1=2 if (irank.eq.isize-1) ke1=ke-1 c nword=(ke-ks+1)*nx2 call mpi_gather(nword,1,mpi_integer,recvcount, * 1,mpi_integer,0,mpi_comm_world,ier) displs(1)=0 do i=2,isize displs(i)=displs(i-1)+recvcount(i-1) end do c CC MPI END c xl=ppin(1) yl=ppin(2) dxl=ppin(3) dyl=ppin(4) dn=ppin(5) dv=ppin(6) vis=vis0 c hx=xl/float(nx1) hy=yl/float(ny1) t1=thx*hx dx1=0.5*t1/hx dy1=0.5*t1/hy dx2=vis*0.50*(t1/hx)**2 dy2=vis*0.50*(t1/hy)**2 c c initial parameters c CC MPI START if (irank.eq.0) then write(6,122) nx,ny,last,nx2,ny2 write(6,124) t1,dx1,dy1,dx2,dy2 end if CC MPI END 122 format(1h ,10i8) 124 format(1h ,8(1pe10.2)) c c initial conditions call ainit1(ppin) c c start of calculation call clock(zt0) c do 500 ii=1,last call clock(zt1) do 300 iir=1,iir0 do 200 iiq=1,iiq0 do 100 iip=1,iip0 c c boundary conditions c do 20 m=1,nb c !hpf$ independent,new(j) c do 22 j=2,ny1 CC MPI START do 22 j=ks1,ke1 CC MPI END !hpf$ on home(f(:,j,:)),local(f,j,m) begin f(1,j,m)=f(nx1,j,m) f(nx2,j,m)=f(2,j,m) !hpf$ end on 22 continue c !hpf$ asynchronous(id1),nobuffer begin c f(1:nx2, 1,m) = f(1:nx2,1+ny,m) c f(1:nx2,ny2,m) = f(1:nx2, 2,m) CC MPI START c do m=1,nb if (irank.eq.isize-1) then call mpi_send(f(1,kge-1,m),n1,mpi_real,0, & 110,mpi_comm_world,ier) elseif (irank.eq.0) then call mpi_recv(f(1,kgs,m),n1,mpi_real,isize-1, & 110,mpi_comm_world,istatus,ier) end if c c call mpi_barrier(mpi_comm_world,ier) c if (irank.eq.0) then call mpi_send(f(1,kgs+1,m),n1,mpi_real,isize-1, & 115,mpi_comm_world,ier) elseif (irank.eq.isize-1) then call mpi_recv(f(1,kge,m),n1,mpi_real,0, & 115,mpi_comm_world,istatus,ier) end if c end do call mpi_barrier(mpi_comm_world,ier) CC MPI END !hpf$ end asynchronous !hpf$ asyncwait(id1) c 20 continue c c c case of iip=1 2-step Lax-Wendroff method if(iip.eq.1) then t=0.5*t1 dx=0.5*dx1 dy=0.5*dy1 do 30 m=1,nb !hpf$ reflect f CC MPI START c do m=1,nb c len=nx2*ny2 if (irank.gt.0) then call mpi_send(f(1,ks,m),n1,mpi_real,irank-1, & 100,mpi_comm_world,ier) end if if (irank.lt.isize-1) then call mpi_recv(f(1,ke+1,m),n1,mpi_real,irank+1, & 100,mpi_comm_world,istatus,ier) end if c end do call mpi_barrier(mpi_comm_world,ier) CC MPI END !hpf$ independent,new(i,j) c do 32 j=1,ny1 CC MPI START do 32 j=ks,ke1 CC MPI END !hpf$ on home(u(:,j,:)),local(f,u,i,m) begin do 321 i=1,nx1 u(i,j,m)=0.25*(f(i,j,m)+f(i+1,j,m)+ 1 f(i,j+1,m)+f(i+1,j+1,m)) 321 continue !hpf$ end on 32 continue 30 continue c else t=t1 dx=dx1 dy=dy1 endif c c c first step c c !hpf$ reflect f !hpf$ independent,new(i,j) c do 40 j=1,ny1 CC MPI START do 40 j=ks,ke1 CC MPI END !hpf$ on home(u(:,j,:)),local(f,u,p,i,m,dx,dy) begin do 401 i=1,nx1 c p(1)=0.25*(f(i,j,1)+f(i+1,j,1)+ 1 f(i,j+1,1)+f(i+1,j+1,1)) p(2)=0.25*(f(i,j,2)+f(i+1,j,2)+ 1 f(i,j+1,2)+f(i+1,j+1,2)) p(3)=0.25*(f(i,j,3)+f(i+1,j,3)+ 1 f(i,j+1,3)+f(i+1,j+1,3)) c u(i,j,1)=u(i,j,1) 1 -dx*(f(i+1,j+1,2)+f(i+1,j,2)-f(i,j+1,2)-f(i,j,2)) 2 -dy*(f(i+1,j+1,3)-f(i+1,j,3)+f(i,j+1,3)-f(i,j,3)) u(i,j,2)=u(i,j,2) 1 -dx*(f(i+1,j+1,1)+f(i+1,j,1)-f(i,j+1,1)-f(i,j,1)) u(i,j,3)=u(i,j,3) 1 -dy*(f(i+1,j+1,1)-f(i+1,j,1)+f(i,j+1,1)-f(i,j,1)) c 401 continue !hpf$ end on 40 continue c c c preparation of second step c second step c !hpf$ independent,new(i,j,m) c do 62 j=1,ny2 CC MPI START do 62 j=ks,ke CC MPI END !hpf$ on home(v(:,j,:)),local(f,v,i,m) begin c do 621 m=1,nb do 621 i=1,nx2 v(i,j,m)=f(i,j,m) 621 continue !hpf$ end on 62 continue c !hpf$ reflect u !hpf$ reflect v CC MPI START do m=1,nb c len=nx2*ny2 if (irank.lt.isize-1) then call mpi_send(u(1,ke,m),n1,mpi_real,irank+1, & 110,mpi_comm_world,ier) call mpi_send(v(1,ke,m),n1,mpi_real,irank+1, & 120,mpi_comm_world,ier) end if if (irank.gt.0) then call mpi_recv(u(1,ks-1,m),n1,mpi_real,irank-1, & 110,mpi_comm_world,istatus,ier) call mpi_recv(v(1,ks-1,m),n1,mpi_real,irank-1, & 120,mpi_comm_world,istatus,ier) end if if (irank.gt.0) then call mpi_send(v(1,ks,m),n1,mpi_real,irank-1, & 125,mpi_comm_world,ier) end if if (irank.lt.isize-1) then call mpi_recv(v(1,ke+1,m),n1,mpi_real,irank+1, & 125,mpi_comm_world,istatus,ier) end if end do call mpi_barrier(mpi_comm_world,ier) CC MPI END !hpf$ independent,new(i,j) c do 60 j=2,ny1 CC MPI START do 60 j=ks1,ke1 CC MPI END !hpf$ on home(f(:,j,:)),local(f,u,v,p,i,m,dx,dy, !hpf$* dx1,dy1,dx2,dy2) begin do 601 i=2,nx1 c p(1)=0.25*(u(i,j,1)+u(i-1,j,1)+ 1 u(i,j-1,1)+u(i-1,j-1,1)) p(2)=0.25*(u(i,j,2)+u(i-1,j,2)+ 1 u(i,j-1,2)+u(i-1,j-1,2)) p(3)=0.25*(u(i,j,3)+u(i-1,j,3)+ 1 u(i,j-1,3)+u(i-1,j-1,3)) c f(i,j,1)=f(i,j,1) 1 -dx1*(u(i,j,2)+u(i,j-1,2)-u(i-1,j,2)-u(i-1,j-1,2)) 2 -dy1*(u(i,j,3)-u(i,j-1,3)+u(i-1,j,3)-u(i-1,j-1,3)) 3 +dx2*(v(i-1,j,1)-2.0*v(i,j,1)+v(i+1,j,1)) 4 +dy2*(v(i,j+1,1)-2.0*v(i,j,1)+v(i,j-1,1)) f(i,j,2)=f(i,j,2) 1 -dx1*(u(i,j,1)+u(i,j-1,1)-u(i-1,j,1)-u(i-1,j-1,1)) 2 +dx2*(v(i-1,j,2)-2.0*v(i,j,2)+v(i+1,j,2)) 3 +dy2*(v(i,j+1,2)-2.0*v(i,j,2)+v(i,j-1,2)) f(i,j,3)=f(i,j,3) 1 -dy1*(u(i,j,1)-u(i,j-1,1)+u(i-1,j,1)-u(i-1,j-1,1)) 2 +dx2*(v(i-1,j,3)-2.0*v(i,j,3)+v(i+1,j,3)) 3 +dy2*(v(i,j+1,3)-2.0*v(i,j,3)+v(i,j-1,3)) c 601 continue !hpf$ end on 60 continue c c end of 1 time step advance c 100 continue 200 continue 300 continue c call clock(zt2) zt1=zt1-zt0 zt2=zt2-zt0 zt=zt2-zt1 CC MPI START if (irank.eq.0) * write(6,402) ii,zt0,zt1,zt2,zt CC MPI END 402 format(1h , i6,1pe12.3,3(0pf12.5)) c c write the output data c 500 continue 9 continue c c call mpi_finalize(ierr) stop end subroutine clock(ti) double precision mpi_wtime real*8 ti,ti1 c call gettod(ti1) c call fjhpf_gettod(ti1) c call xclock(ti1,5) ti1=mpi_wtime() c ti=1.0d-6*ti1 ti=ti1 return end subroutine ainit1(ppin) implicit real*8 (a-h,o-z) parameter(npe=2) !hpf$ processors pe(npe) parameter(nx=100,ny=100) parameter(nb=3,iip0=8,iiq0=4,iir0=4,last=4) parameter(nx1=nx+1,nx2=nx+2,ny1=ny+1,ny2=ny+2) parameter(n1=nx2,n2=n1*ny2,n3=n2*nb,noinp=30) c c dimension f(nx2,ny2,nb) CC MPI START parameter(nyy=(ny2-1)/npe+1) dimension f(nx2,0:nyy+1,nb) CC MPI END c dimension gf(nx2,ny2,nb) dimension ppin(10) !hpf$ distribute f(*,block,*) onto pe !hpf$ shadow f(0,1:1,0) c equivalence (gf,f) CC MPI START include 'mpif.h' integer istatus(mpi_status_size) c common /para_info/ks,ks1,ke,ke1,kss,irank,isize common /para_info/ks,ks1,kgs,ke,ke1,kge,kss,irank,isize CC MPI END common /blk/f c c xl=ppin(1) yl=ppin(2) dxl=ppin(3) dyl=ppin(4) dn=ppin(5) dv=ppin(6) c hx=xl/float(nx1) hy=yl/float(ny1) c !hpf$ independent,new(i,j) c do 10 j=1,ny2 CC MPI START do 10 j=ks,ke CC MPI END !hpf$ on home(f(:,j,:)),local(f,i,m,x,y,dn1,dv1,ax1, !hpf$* hx,hy,dn) begin y=0.5*hy*(2*j-ny2-1) do 101 i=1,nx2 c y=0.5*hy*(2*j-ny2-1) x=0.5*hx*(2*i-nx2-1) dn1=0.0 dv1=0.0 ax1=sqrt(x*x+y*y) if(ax1.le.dxl) dn1=dn f(i,j,1)=dn1 f(i,j,2)=0.0 f(i,j,3)=0.0 101 continue !hpf$ end on 10 continue c return end