你的想法很独特,比如for(i=0;i<15;i++){},如果你确定这15次都要做,并且他们的执行顺序无所谓,并且他们都很慢并行能大大提高效率,那可以把i=014作为15个任务下发,用线程池去处理
!
! a cross bf
!
! Fixed-Format Fortran Source File
! Generated by PGI Visual Fortran(R)
! 2010-12-12 21:58:04
!
!Parallel matrix multiplication: main program
program cross
implicit double precision (a-h, o-z)
include 'mpifh'
parameter (nbuffer=12810241024/8)
dimension buf(nbuffer),buf2(nbuffer)
double precision time_start, time_end
external init, check, matmul
call MPI_Init(ierr)
call MPI_Comm_rank(MPI_COMM_WORLD, myrank, ierr)
call MPI_Comm_size(MPI_COMM_WORLD, nprocs, ierr)
if (myrankeq0) then
print , 'Enter M, N, L: '
call flush(6)
read(,) M, N, L
endif
call MPI_Bcast(M, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
call MPI_Bcast(N, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
call MPI_Bcast(L, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
if ( mod(m,nprocs)ne0 or mod(l,nprocs)ne0 ) then
if (myrankeq0) print , 'M or L cannot be divided by nprocs!'
call MPI_Finalize(ierr)
stop
endif
ia = 1
ib = ia + m/nprocs ! n
ic = ib + n ! l/nprocs
iwk = ic + m/nprocs ! l
iend = iwk + n ! l/nprocs
if ( iend gt nbuffer+1 ) then
if (myrankeq0) print , 'Insufficient buffer size!'
call MPI_Finalize(ierr)
stop
endif
call init( m, n, l, myrank, nprocs, buf(ia), buf(ib), buf(ic)
& , buf2(ia),buf2(ib),buf2(ic) )
time_start = MPI_Wtime()
call matmul( m, n, l, myrank, nprocs, buf2(ia), buf2(ib), buf2(ic)
& , buf2(iwk) )
time_end = MPI_Wtime()
call check( m, n, l, myrank, nprocs, buf2(ia), buf2(ib), buf2(ic))
if ( myrank eq 0 ) then
print , 'time = ', time_end-time_start
print , 'mflops = ', m(n+n-10)l/(time_end-time_start)1d-6
endif
print,'ok'
call MPI_Finalize(ierr)
stop
end
!------------------------------------------------------------------
subroutine init(m, n, l, myrank, nprocs, a, b, c, a2, b2,c2)
implicit double precision (a-h, o-z)
include 'mpifh'
dimension a(m/nprocs, n), b(n, l/nprocs), c(m/nprocs, l)
dimension a2(n, m/nprocs), b2(l/nprocs, n), c2(l,m/nprocs)
mloc = m/nprocs
lloc = l/nprocs
! Init a, b
do j=1, n
do i=1, mloc
a(i,j) = i+myrankmloc
enddo
enddo
do j=1, lloc
do i=1, n
b(i,j) = j+myranklloc
enddo
enddo
! Tranpose a, b -> a2, b2
do j=1, mloc
do i=1,n
a2(i,j) = a(j,i)
enddo
enddo
do j=1, n
do i=1,lloc
b2(i,j) = b(j,i)
enddo
enddo
return
end
!------------------------------------------------------------------
subroutine check(m, n, l, myrank, nprocs, a, b, c)
implicit double precision (a-h, o-z)
include 'mpifh'
dimension a(m/nprocs, n), b(n, l/nprocs), c(m/nprocs, l)
!dimension a(n,m/nprocs), b(l/nprocs,n), c(l,m/nprocs)
integer local_code, code
mloc = m/nprocs
lloc = l/nprocs
!Check the results
local_code = 0
do i=1, l
do j=1, mloc
if ( abs(c(i,j) - ndble(j+myranklloc)i) gt 1d-10 ) then
local_code = 1
print,'local_code=',local_code
goto 10
endif
enddo
enddo
10 call MPI_Reduce( local_code, code, 1, MPI_INTEGER, MPI_SUM, 0,
& MPI_COMM_WORLD, ierr)
!
if ( myrank eq 0 ) then
print , 'code = ', code
endif
!
return
end
!Parallel multiplication of matrices using MPI_Isend/MPI_Irecv
subroutine matmul(m, n, l, myrank, nprocs, a, b, c, work)
implicit double precision (a-h, o-z)
include 'mpifh'
dimension a(n,m/nprocs), b(l/nprocs,n), c(l/nprocs,m),
& work(n,m/nprocs)
integer src, dest, tag
integer status(MPI_STATUS_SIZE, 2), request(2)
mloc = m/nprocs
lloc = l/nprocs
dest = mod( myrank-1+nprocs, nprocs )
src = mod( myrank+1, nprocs )
jpos=myrankmloc
print,'myrank=',myrank
c print,'dest=',dest,'src=',src
c print,'jpos=',jpos,'tag=',tag
do ip=1, nprocs - 1
tag = 10000 + ip
call MPI_Isend( a, nmloc, MPI_DOUBLE_PRECISION, dest, tag,
& MPI_COMM_WORLD, request(1), ierr )
call MPI_Irecv( work, nmloc, MPI_DOUBLE_PRECISION, src, tag,
& MPI_COMM_WORLD, request(2), ierr )
do i=1, lloc
do j=1, mloc
sum=0d0
do k=1, n
sum = sum + b(i,k) a(k,j)
enddo
c(i, j+jpos) = sum
enddo
enddo
call MPI_Waitall(2, request, status, ierr)
拷贝 work -> b (可以通过在计算/通信中交替使用 b/work 来避该免 *** 作)
do i=1, n
do j=1, mloc
a(i,j) = work(i,j)
enddo
enddo
jpos = jpos + mloc
if ( jpos ge m ) jpos = 0
enddo
do i=1, lloc
do j=1, mloc
sum=0d0
do k=1, n
sum = sum + b(i,k) a(k,j)
enddo
c(i, j+jpos) = sum
enddo
enddo
print,'c(1,mloc)=',c(1,mloc)
print,'c(1,2)=', c(1,2)
print,'c(2,1)=', c(2,1)
print,'c(lloc,1)=',c(lloc,1)
return
end
程序运行参数咯,比如你在cmd下运行:
shutdown /s /t 1shutdown就是argv[0],/s就是argv[1],其后依次类推。
不信你写个程序试试。
明天考完linux给答案,求10分。
(1) MPI_Comm_size(MPI_COMM_WORLD,&numprocs);指定进程数
MPI_Comm_rank(MPI_COMM_WORLD,&myid); 取得进程数
(2)本题是块划分方式
循环划分代码:
i=myid;
while(i<size)
{
myresult+=data[i];
i+=numproc;
}
(3)书上83-85
(4)SPMD程序的特点:
各个处理机上程序同时并行运行,首先初始化好(从/$home/data读取数据并存到data[10]数组中)
然后把任务分配给各个程序
最后把结果规约到root进程,
并在root上做出相应处理(本题是打印)
(5)MPI_Bcast(data, SIZE, MPI_INT, 0, MPI_COMM_WORLD);
把data的值广播(Broadcast)给每个进程
MPI_Reduce(&myresult, &result, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
把各个进程的myresult规约到根进程的result结果中。
以上就是关于MPI并行计算for循环全部的内容,包括:MPI并行计算for循环、mpi 矩阵相乘 c语言、C++语言里mpi并行程序里面argc和argv是什么,我知道argc是变量数目,argv是变量数组。等相关内容解答,如果想了解更多相关内容,可以关注我们,你们的支持是我们更新的动力!
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)