mpi 矩阵相乘 c语言_软件运维

! a cross b.f

! Fixed-Format Fortran Source File

! Generated by PGI Visual Fortran(R)

! 2010-12-12 21:58:04

!Parallel matrix multiplication: main program

program cross

implicit double precision (a-h, o-z)

include 'mpif.h'

parameter (nbuffer=128*1024*1024/8)

dimension buf(nbuffer),buf2(nbuffer)

double precision time_start, time_end

external init, check, matmul

call MPI_Init(ierr)

call MPI_Comm_rank(MPI_COMM_WORLD, myrank, ierr)

call MPI_Comm_size(MPI_COMM_WORLD, nprocs, ierr)

if (myrank.eq.0) then

print *, 'Enter M, N, L: '

call flush(6)

read(*,*) M, N, L

endif

call MPI_Bcast(M, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)

call MPI_Bcast(N, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)

call MPI_Bcast(L, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)

if ( mod(m,nprocs).ne.0 .or. mod(l,nprocs).ne.0 ) then

if (myrank.eq.0) print *, 'M or L cannot be divided by nprocs!'

call MPI_Finalize(ierr)

stop

endif

ia = 1

ib = ia + m/nprocs ! n

ic = ib + n ! l/nprocs

iwk = ic + m/nprocs ! l

iend = iwk + n ! l/nprocs

if ( iend .gt. nbuffer+1 ) then

if (myrank.eq.0) print *, 'Insufficient buffer size!'

call MPI_Finalize(ierr)

stop

endif

call init( m, n, l, myrank, nprocs, buf(ia), buf(ib), buf(ic)

& , buf2(ia),buf2(ib),buf2(ic) )

time_start = MPI_Wtime()

call matmul( m, n, l, myrank, nprocs, buf2(ia), buf2(ib), buf2(ic)

&, buf2(iwk) )

time_end = MPI_Wtime()

call check( m, n, l, myrank, nprocs, buf2(ia), buf2(ib), buf2(ic))

if ( myrank .eq. 0 ) then

print *, 'time = ', time_end-time_start

print *, 'mflops = ', m*(n+n-1.0)*l/(time_end-time_start)*1d-6

endif

print*,'ok'

call MPI_Finalize(ierr)

stop

end

!------------------------------------------------------------------

subroutine init(m, n, l, myrank, nprocs, a, b, c, a2, b2,c2)

implicit double precision (a-h, o-z)

include 'mpif.h'

dimension a(m/nprocs, n), b(n, l/nprocs), c(m/nprocs, l)

dimension a2(n, m/nprocs), b2(l/nprocs, n), c2(l,m/nprocs)

mloc = m/nprocs

lloc = l/nprocs

! Init. a, b

do j=1, n

do i=1, mloc

a(i,j) = i+myrank*mloc

enddo

do j=1, lloc

do i=1, n

b(i,j) = j+myrank*lloc

enddo

! Tranpose a, b ->a2, b2

do j=1, mloc

do i=1,n

a2(i,j) = a(j,i)

enddo

do j=1, n

do i=1,lloc

b2(i,j) = b(j,i)

enddo

return

end

!------------------------------------------------------------------

subroutine check(m, n, l, myrank, nprocs, a, b, c)

implicit double precision (a-h, o-z)

include 'mpif.h'

dimension a(m/nprocs, n), b(n, l/nprocs), c(m/nprocs, l)

!dimension a(n,m/nprocs), b(l/nprocs,n), c(l,m/nprocs)

integer local_code, code

mloc = m/nprocs

lloc = l/nprocs

!Check the results

local_code = 0

do i=1, l

do j=1, mloc

if ( abs(c(i,j) - n*dble(j+myrank*lloc)*i) .gt. 1d-10 ) then

local_code = 1

print*,'local_code=',local_code

goto 10

endif

enddo

10call MPI_Reduce( local_code, code, 1, MPI_INTEGER, MPI_SUM, 0,

&MPI_COMM_WORLD, ierr)

if ( myrank .eq. 0 ) then

print *, 'code = ', code

endif

return

end

*!Parallel multiplication of matrices using MPI_Isend/MPI_Irecv

subroutine matmul(m, n, l, myrank, nprocs, a, b, c, work)

implicit double precision (a-h, o-z)

include 'mpif.h'

dimension a(n,m/nprocs), b(l/nprocs,n), c(l/nprocs,m),

& work(n,m/nprocs)

integer src, dest, tag

integer status(MPI_STATUS_SIZE, 2), request(2)

mloc = m/nprocs

lloc = l/nprocs

dest = mod( myrank-1+nprocs, nprocs )

src = mod( myrank+1,nprocs )

jpos=myrank*mloc

print*,'myrank=',myrank

c print*,'dest=',dest,'src=',src

c print*,'jpos=',jpos,'tag=',tag

do ip=1, nprocs - 1

tag = 10000 + ip

call MPI_Isend( a, n*mloc, MPI_DOUBLE_PRECISION, dest, tag,

& MPI_COMM_WORLD, request(1), ierr )

call MPI_Irecv( work, n*mloc, MPI_DOUBLE_PRECISION, src, tag,

& MPI_COMM_WORLD, request(2), ierr )

do i=1, lloc

do j=1, mloc

sum=0.d0

do k=1, n

sum = sum + b(i,k) * a(k,j)

enddo

c(i, j+jpos) = sum

enddo

call MPI_Waitall(2, request, status, ierr)

* 拷贝 work ->b (可以通过在计算/通信中交替使用 b/work 来避该免 *** 作)

do i=1, n

do j=1, mloc

a(i,j) = work(i,j)

enddo

jpos = jpos + mloc

if ( jpos .ge. m ) jpos = 0

enddo

do i=1, lloc

do j=1, mloc

sum=0.d0

do k=1, n

sum = sum + b(i,k) * a(k,j)

enddo

c(i, j+jpos) = sum

enddo

print*,'c(1,mloc)=',c(1,mloc)

print*,'c(1,2)=', c(1,2)

print*,'c(2,1)=', c(2,1)

print*,'c(lloc,1)=',c(lloc,1)

return

end

C语言利用数组计算超大整数的阶乘代码

#include <stdio.h>

int main()

{

int n

int a[9000]//确保保存最终运算结果的数组足够大

int digit = 1//位数

int temp //阶乘的任一元素与临时结果的某位的乘积结果

int i, j, carry//carry：进位

printf("please in put n:\n")

scanf("%d",&n)

a[0] = 1 //将结果先初始化为1

for ( i=2i<=ni++ ) //开始阶乘，阶乘元素从2开始依次"登场"

{ //按最基本的乘法运算思想来考虑，将临时结果的每位与阶乘元素相乘

for( j=1, carry=0 j<=digitj++ )

{

temp = a[j-1] * i + carry//相应阶乘中的一项与当前所得临时结果的某位相乘（加上进位）

a[j-1] = temp % 10//更新临时结果的位上信息

carry = temp / 10//看是否有进位

}

while(carry)

{//如果有进位

a[++digit-1] = carry % 10//新加一位，添加信息。位数增1

carry = carry / 10//看还能不能进位

}

printf("n ! = ") //显示结果

for(j = digitj >=1j--)

{

printf("%d",a[j-1])

}

printf("\n")

return 0

}#include <stdio.h> int main(){int n int a[9000]//确保保存最终运算结果的数组足够大 int digit = 1//位数 int temp //阶乘的任一元素与临时结果的某位的乘积结果 int i, j, carry//carry：进位 printf("please in put n:\n") scanf("%d",&n) a[0] = 1 //将结果先初始化为1 for ( i=2i<=ni++ ) //开始阶乘，阶乘元素从2开始依次"登场"{ //按最基本的乘法运算思想来考虑，将临时结果的每位与阶乘元素相乘 for( j=1, carry=0 j<=digitj++ ){temp = a[j-1] * i + carry//相应阶乘中的一项与当前所得临时结果的某位相乘（加上进位） a[j-1] = temp % 10//更新临时结果的位上信息 carry = temp / 10//看是否有进位 }while(carry){//如果有进位 a[++digit-1] = carry % 10//新加一位，添加信息。位数增1carry = carry / 10//看还能不能进位 }}printf("n ! = ") //显示结果for(j = digitj >=1j--){printf("%d",a[j-1]) }printf("\n") return 0 }

可以调用 matrix2（）或 matrix() 做矩阵相乘。

下面主函数以调用 matrix() 为例，介绍过程：

输入参数，动态分配数组，输入矩阵数据，调用，输出，释放内存。

#include<iostream>

using namespace std

#include<stdio.h>

/*---------------------

c[j]][i] = a[j][k] * b[k][i] = c[j][i]

a[ny][nk]

b[nk][nx]

c[ny][nx]

*---------------------*/

void matrix2(int *a,int *b, int *c, int nx, int ny, int nk)

{

int i,j,k

for(j=0j<nyj++){

for(i=0i<nxi++){

c[j*nx+i]=0

for(k=0k<nkk++)c[j*nx+i]+= a[j*nk+k] * b[k*nx+i]

}

/*---------------------

b[j][k] * c[k][i] = a[j][i]

*---------------------*/

void matrix(int **b,int **c, int **a, int nx, int ny, int nk)

{

int i,j,k

for (j=0j<nyj++)for(i=0i<nxi++)a[j][i]= 0

for(j=0j<nyj++){

for(i=0i<nxi++){

for(k=0k<nkk++)a[j][i]+= b[j][k]*c[k][i]

}

int main()

{

int i,j,k,tmp

int b_row,b_col

int c_row,c_col

int a_row,a_col

int **b,**c,**a

printf("please enter b_row b_col of matrix B\n")

scanf("%d %d",&b_row,&b_col)

c_row = b_col

printf("please enter c_col of matrix C\n")

scanf("%d",&c_col)

a_row = b_row

a_col = c_col

a = (int **) malloc(sizeof(int *) * a_row)

for (j=0j<a_rowj++){

a[j] = (int *) malloc(sizeof(int) * a_col)

}

b = (int **) malloc(sizeof(int *) * b_row)

for (j=0j<b_rowj++){

b[j] = (int *) malloc(sizeof(int) * b_col)

}

c = (int **) malloc(sizeof(int *) * c_row)

for (j=0j<c_rowj++){

c[j] = (int *) malloc(sizeof(int) * c_col)

}

if (!c[c_row-1]) {

printf("no enought memory\n")exit(0)

}

printf("Please input int matrix b[%d][%d]\n",b_row,b_col)

for (j=0j<b_rowj++)

for (i=0i<b_coli++){

scanf("%d",&tmp)

b[j][i] = tmp

}

printf("Please input int matrix c[%d][%d]\n",c_row,c_col)

for (j=0j<c_rowj++)

for (i=0i<c_coli++){

scanf("%d",&tmp)

c[j][i] = tmp

}

matrix( b ,c,a, a_col, a_row, b_col)

for(j=0j<a_rowj++)

{

for (i=0i<a_coli++) printf("%d ",a[j][i])

printf("\n")

}

for (i=0i<a_rowi++) free((void *)a[i])free((void *)a)

for (i=0i<b_rowi++) free((void *)b[i])free((void *)b)

for (i=0i<c_rowi++) free((void *)c[i])free((void *)c)

return 0

}

数据和结果例子：

please enter b_row b_col of matrix B

3 2

please enter c_col of matrix C

Please input int matrix b[3][2]

1 2 3 4 5 6

Please input int matrix c[2][3]

1 2 3 4 5 6

9 12 15

19 26 33

29 40 51

欢迎分享，转载请注明来源：内存溢出

原文地址: http://outofmemory.cn/yw/8099649.html

mpi 矩阵相乘 c语言

发表评论

评论列表（0条）