MPI并行计算for循环

MPI并行计算for循环,第1张

你的想法很独特,比如for(i=0;i<15;i++){},如果你确定这15次都要做,并且他们的执行顺序无所谓,并且他们都很慢并行能大大提高效率,那可以把i=014作为15个任务下发,用线程池去处理

!

! a cross bf

!

! Fixed-Format Fortran Source File

! Generated by PGI Visual Fortran(R)

! 2010-12-12 21:58:04

!

!Parallel matrix multiplication: main program

program cross

implicit double precision (a-h, o-z)

include 'mpifh'

parameter (nbuffer=12810241024/8)

dimension buf(nbuffer),buf2(nbuffer)

double precision time_start, time_end

external init, check, matmul

call MPI_Init(ierr)

call MPI_Comm_rank(MPI_COMM_WORLD, myrank, ierr)

call MPI_Comm_size(MPI_COMM_WORLD, nprocs, ierr)

if (myrankeq0) then

print , 'Enter M, N, L: '

call flush(6)

read(,) M, N, L

endif

call MPI_Bcast(M, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)

call MPI_Bcast(N, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)

call MPI_Bcast(L, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)

if ( mod(m,nprocs)ne0 or mod(l,nprocs)ne0 ) then

if (myrankeq0) print , 'M or L cannot be divided by nprocs!'

call MPI_Finalize(ierr)

stop

endif

ia = 1

ib = ia + m/nprocs ! n

ic = ib + n ! l/nprocs

iwk = ic + m/nprocs ! l

iend = iwk + n ! l/nprocs

if ( iend gt nbuffer+1 ) then

if (myrankeq0) print , 'Insufficient buffer size!'

call MPI_Finalize(ierr)

stop

endif

call init( m, n, l, myrank, nprocs, buf(ia), buf(ib), buf(ic)

& , buf2(ia),buf2(ib),buf2(ic) )

time_start = MPI_Wtime()

call matmul( m, n, l, myrank, nprocs, buf2(ia), buf2(ib), buf2(ic)

& , buf2(iwk) )

time_end = MPI_Wtime()

call check( m, n, l, myrank, nprocs, buf2(ia), buf2(ib), buf2(ic))

if ( myrank eq 0 ) then

print , 'time = ', time_end-time_start

print , 'mflops = ', m(n+n-10)l/(time_end-time_start)1d-6

endif

print,'ok'

call MPI_Finalize(ierr)

stop

end

!------------------------------------------------------------------

subroutine init(m, n, l, myrank, nprocs, a, b, c, a2, b2,c2)

implicit double precision (a-h, o-z)

include 'mpifh'

dimension a(m/nprocs, n), b(n, l/nprocs), c(m/nprocs, l)

dimension a2(n, m/nprocs), b2(l/nprocs, n), c2(l,m/nprocs)

mloc = m/nprocs

lloc = l/nprocs

! Init a, b

do j=1, n

do i=1, mloc

a(i,j) = i+myrankmloc

enddo

enddo

do j=1, lloc

do i=1, n

b(i,j) = j+myranklloc

enddo

enddo

! Tranpose a, b -> a2, b2

do j=1, mloc

do i=1,n

a2(i,j) = a(j,i)

enddo

enddo

do j=1, n

do i=1,lloc

b2(i,j) = b(j,i)

enddo

enddo

return

end

!------------------------------------------------------------------

subroutine check(m, n, l, myrank, nprocs, a, b, c)

implicit double precision (a-h, o-z)

include 'mpifh'

dimension a(m/nprocs, n), b(n, l/nprocs), c(m/nprocs, l)

!dimension a(n,m/nprocs), b(l/nprocs,n), c(l,m/nprocs)

integer local_code, code

mloc = m/nprocs

lloc = l/nprocs

!Check the results

local_code = 0

do i=1, l

do j=1, mloc

if ( abs(c(i,j) - ndble(j+myranklloc)i) gt 1d-10 ) then

local_code = 1

print,'local_code=',local_code

goto 10

endif

enddo

enddo

10 call MPI_Reduce( local_code, code, 1, MPI_INTEGER, MPI_SUM, 0,

& MPI_COMM_WORLD, ierr)

!

if ( myrank eq 0 ) then

print , 'code = ', code

endif

!

return

end

!Parallel multiplication of matrices using MPI_Isend/MPI_Irecv

subroutine matmul(m, n, l, myrank, nprocs, a, b, c, work)

implicit double precision (a-h, o-z)

include 'mpifh'

dimension a(n,m/nprocs), b(l/nprocs,n), c(l/nprocs,m),

& work(n,m/nprocs)

integer src, dest, tag

integer status(MPI_STATUS_SIZE, 2), request(2)

mloc = m/nprocs

lloc = l/nprocs

dest = mod( myrank-1+nprocs, nprocs )

src = mod( myrank+1, nprocs )

jpos=myrankmloc

print,'myrank=',myrank

c print,'dest=',dest,'src=',src

c print,'jpos=',jpos,'tag=',tag

do ip=1, nprocs - 1

tag = 10000 + ip

call MPI_Isend( a, nmloc, MPI_DOUBLE_PRECISION, dest, tag,

& MPI_COMM_WORLD, request(1), ierr )

call MPI_Irecv( work, nmloc, MPI_DOUBLE_PRECISION, src, tag,

& MPI_COMM_WORLD, request(2), ierr )

do i=1, lloc

do j=1, mloc

sum=0d0

do k=1, n

sum = sum + b(i,k) a(k,j)

enddo

c(i, j+jpos) = sum

enddo

enddo

call MPI_Waitall(2, request, status, ierr)

拷贝 work -> b (可以通过在计算/通信中交替使用 b/work 来避该免 *** 作)

do i=1, n

do j=1, mloc

a(i,j) = work(i,j)

enddo

enddo

jpos = jpos + mloc

if ( jpos ge m ) jpos = 0

enddo

do i=1, lloc

do j=1, mloc

sum=0d0

do k=1, n

sum = sum + b(i,k) a(k,j)

enddo

c(i, j+jpos) = sum

enddo

enddo

print,'c(1,mloc)=',c(1,mloc)

print,'c(1,2)=', c(1,2)

print,'c(2,1)=', c(2,1)

print,'c(lloc,1)=',c(lloc,1)

return

end

程序运行参数咯,比如你在cmd下运行:

shutdown /s /t 1

shutdown就是argv[0],/s就是argv[1],其后依次类推。

不信你写个程序试试。

明天考完linux给答案,求10分。

(1) MPI_Comm_size(MPI_COMM_WORLD,&numprocs);指定进程

MPI_Comm_rank(MPI_COMM_WORLD,&myid); 取得进程数

(2)本题是块划分方式

循环划分代码:

i=myid;

while(i<size)

{

myresult+=data[i];

i+=numproc;

}

(3)书上83-85

(4)SPMD程序的特点:

各个处理机上程序同时并行运行,首先初始化好(从/$home/data读取数据并存到data[10]数组中)

然后把任务分配给各个程序

最后把结果规约到root进程,

并在root上做出相应处理(本题是打印)

(5)MPI_Bcast(data, SIZE, MPI_INT, 0, MPI_COMM_WORLD);

把data的值广播(Broadcast)给每个进程

MPI_Reduce(&myresult, &result, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);

把各个进程的myresult规约到根进程的result结果中。

以上就是关于MPI并行计算for循环全部的内容,包括:MPI并行计算for循环、mpi 矩阵相乘 c语言、C++语言里mpi并行程序里面argc和argv是什么,我知道argc是变量数目,argv是变量数组。等相关内容解答,如果想了解更多相关内容,可以关注我们,你们的支持是我们更新的动力!

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/zz/9436769.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2023-04-28
下一篇 2023-04-28

发表评论

登录后才能评论

评论列表(0条)

保存