mpi 矩阵相乘 c语言

mpi 矩阵相乘 c语言,第1张

!

! a cross b.f

!

! Fixed-Format Fortran Source File

! Generated by PGI Visual Fortran(R)

! 2010-12-12 21:58:04

!

!Parallel matrix multiplication: main program

program cross

implicit double precision (a-h, o-z)

include 'mpif.h'

parameter (nbuffer=128*1024*1024/8)

dimension buf(nbuffer),buf2(nbuffer)

double precision time_start, time_end

external init, check, matmul

call MPI_Init(ierr)

call MPI_Comm_rank(MPI_COMM_WORLD, myrank, ierr)

call MPI_Comm_size(MPI_COMM_WORLD, nprocs, ierr)

if (myrank.eq.0) then

print *, 'Enter M, N, L: '

call flush(6)

read(*,*) M, N, L

endif

call MPI_Bcast(M, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)

call MPI_Bcast(N, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)

call MPI_Bcast(L, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)

if ( mod(m,nprocs).ne.0 .or. mod(l,nprocs).ne.0 ) then

if (myrank.eq.0) print *, 'M or L cannot be divided by nprocs!'

call MPI_Finalize(ierr)

stop

endif

ia = 1

ib = ia + m/nprocs ! n

ic = ib + n ! l/nprocs

iwk = ic + m/nprocs ! l

iend = iwk + n ! l/nprocs

if ( iend .gt. nbuffer+1 ) then

if (myrank.eq.0) print *, 'Insufficient buffer size!'

call MPI_Finalize(ierr)

stop

endif

call init( m, n, l, myrank, nprocs, buf(ia), buf(ib), buf(ic)

& , buf2(ia),buf2(ib),buf2(ic) )

time_start = MPI_Wtime()

call matmul( m, n, l, myrank, nprocs, buf2(ia), buf2(ib), buf2(ic)

&, buf2(iwk) )

time_end = MPI_Wtime()

call check( m, n, l, myrank, nprocs, buf2(ia), buf2(ib), buf2(ic))

if ( myrank .eq. 0 ) then

print *, 'time = ', time_end-time_start

print *, 'mflops = ', m*(n+n-1.0)*l/(time_end-time_start)*1d-6

endif

print*,'ok'

call MPI_Finalize(ierr)

stop

end

!------------------------------------------------------------------

subroutine init(m, n, l, myrank, nprocs, a, b, c, a2, b2,c2)

implicit double precision (a-h, o-z)

include 'mpif.h'

dimension a(m/nprocs, n), b(n, l/nprocs), c(m/nprocs, l)

dimension a2(n, m/nprocs), b2(l/nprocs, n), c2(l,m/nprocs)

mloc = m/nprocs

lloc = l/nprocs

! Init. a, b

do j=1, n

do i=1, mloc

a(i,j) = i+myrank*mloc

enddo

enddo

do j=1, lloc

do i=1, n

b(i,j) = j+myrank*lloc

enddo

enddo

! Tranpose a, b ->a2, b2

do j=1, mloc

do i=1,n

a2(i,j) = a(j,i)

enddo

enddo

do j=1, n

do i=1,lloc

b2(i,j) = b(j,i)

enddo

enddo

return

end

!------------------------------------------------------------------

subroutine check(m, n, l, myrank, nprocs, a, b, c)

implicit double precision (a-h, o-z)

include 'mpif.h'

dimension a(m/nprocs, n), b(n, l/nprocs), c(m/nprocs, l)

!dimension a(n,m/nprocs), b(l/nprocs,n), c(l,m/nprocs)

integer local_code, code

mloc = m/nprocs

lloc = l/nprocs

!Check the results

local_code = 0

do i=1, l

do j=1, mloc

if ( abs(c(i,j) - n*dble(j+myrank*lloc)*i) .gt. 1d-10 ) then

local_code = 1

print*,'local_code=',local_code

goto 10

endif

enddo

enddo

10call MPI_Reduce( local_code, code, 1, MPI_INTEGER, MPI_SUM, 0,

&MPI_COMM_WORLD, ierr)

!

if ( myrank .eq. 0 ) then

print *, 'code = ', code

endif

!

return

end

*!Parallel multiplication of matrices using MPI_Isend/MPI_Irecv

*

subroutine matmul(m, n, l, myrank, nprocs, a, b, c, work)

implicit double precision (a-h, o-z)

include 'mpif.h'

dimension a(n,m/nprocs), b(l/nprocs,n), c(l/nprocs,m),

& work(n,m/nprocs)

integer src, dest, tag

integer status(MPI_STATUS_SIZE, 2), request(2)

*

mloc = m/nprocs

lloc = l/nprocs

*

dest = mod( myrank-1+nprocs, nprocs )

src = mod( myrank+1,nprocs )

*

jpos=myrank*mloc

print*,'myrank=',myrank

c print*,'dest=',dest,'src=',src

c print*,'jpos=',jpos,'tag=',tag

*

do ip=1, nprocs - 1

tag = 10000 + ip

*

call MPI_Isend( a, n*mloc, MPI_DOUBLE_PRECISION, dest, tag,

& MPI_COMM_WORLD, request(1), ierr )

call MPI_Irecv( work, n*mloc, MPI_DOUBLE_PRECISION, src, tag,

& MPI_COMM_WORLD, request(2), ierr )

*

do i=1, lloc

do j=1, mloc

sum=0.d0

do k=1, n

sum = sum + b(i,k) * a(k,j)

enddo

c(i, j+jpos) = sum

enddo

enddo

*

call MPI_Waitall(2, request, status, ierr)

*

* 拷贝 work ->b (可以通过在计算/通信中交替使用 b/work 来避该免 *** 作)

do i=1, n

do j=1, mloc

a(i,j) = work(i,j)

enddo

enddo

*

jpos = jpos + mloc

if ( jpos .ge. m ) jpos = 0

*

enddo

*

do i=1, lloc

do j=1, mloc

sum=0.d0

do k=1, n

sum = sum + b(i,k) * a(k,j)

enddo

c(i, j+jpos) = sum

enddo

enddo

*

print*,'c(1,mloc)=',c(1,mloc)

print*,'c(1,2)=', c(1,2)

print*,'c(2,1)=', c(2,1)

print*,'c(lloc,1)=',c(lloc,1)

return

end

并行编程模式

对等模式—程序的各个部分地位相同,功能和代码基本一致,只是处理的数据或对象不同;主从模式—程序通信进程之间的一种主从或依赖关系 。

点对点通信模式

阻塞—发送完成的数据已经拷贝出发送缓冲区,即发送缓冲区可以重新分配使用,阻塞接受的完成意味着接收数据已经拷贝到接收缓冲区,即接收方已可以使用。非阻塞—在必要的硬件支持下,可以实现计算和通信的重叠。4种通信模式:标准通信模式、缓存通信模式、同步通信模式、就绪通信模式 。

组通信

一个特定组内所有进程都参加全局的数据处理和通信 *** 作 。

功能:通信—组内数据的传输;同步—所有进程在特定的点上取得一致;计算—对给定的数据完成一定的 *** 作 。

类型:1)数据移动:广播(mpi bcast) 收集(mpi gather) 散射(mpi scater)组收集(mpi all gather)全交换(all to all);2)聚集:规约(mpi reduce)将组内所有的进程输入 缓冲区中的数据按,定 *** 作OP进行运算,并将起始结果返回到root进程的接收缓冲区扫描(mpi scan)要求每一个进程对排在它前面的进程进行规约 *** 作,结果存入自身的输出缓冲区;3)同步:路障(mpi barrier)实现通信域内所有进程互相同步,它们将处于等待状态,直到所有进程执行它们各自的MPI-BARRIER调用 。

#include <mpi.h>

#include <stdio.h>

#include <math.h>

#define SIZE 10

void main(int argc, char *argv)

{

int myid, numprocs

int data[SIZE], i, x, low, high, myresult, result

char fn[255]

char *fp

MPI_Init(&argc,&argv)

MPI_Comm_size(MPI_COMM_WORLD,&numprocs)

MPI_Comm_rank(MPI_COMM_WORLD,&myid)

if (myid == 0) { /* Open input file and initialize data */

strcpy(fn,getenv("HOME"))

strcat(fn,"/data")

if ((fp = fopen(fn,"r")) == NULL) {

printf("Can’t open the input file: %s\n\n", fn)

exit(1)

}

for(i = 0i <SIZEi++) fscanf(fp,"%d", &data[i])

}

/* broadcast data */

MPI_Bcast(data, SIZE, MPI_INT, 0, MPI_COMM_WORLD)

/* Add my portion Of data */

x = SIZE/numprocs

low = myid * x

high = low + x

if(myid == numprocs - 1) high = SIZE

myresult = 0

for(i = lowi <highi++)

myresult += data[i]

/* Compute global sum */

MPI_Reduce(&myresult, &result, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD)

if (myid == 0) printf("The sum is %d.\n", result)

MPI_Finalize()

}


欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/yw/12012822.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2023-05-20
下一篇 2023-05-20

发表评论

登录后才能评论

评论列表(0条)

保存