! a cross b.f
!
! Fixed-Format Fortran Source File
! Generated by PGI Visual Fortran(R)
! 2010-12-12 21:58:04
!
!Parallel matrix multiplication: main program
program cross
implicit double precision (a-h, o-z)
include 'mpif.h'
parameter (nbuffer=128*1024*1024/8)
dimension buf(nbuffer),buf2(nbuffer)
double precision time_start, time_end
external init, check, matmul
call MPI_Init(ierr)
call MPI_Comm_rank(MPI_COMM_WORLD, myrank, ierr)
call MPI_Comm_size(MPI_COMM_WORLD, nprocs, ierr)
if (myrank.eq.0) then
print *, 'Enter M, N, L: '
call flush(6)
read(*,*) M, N, L
endif
call MPI_Bcast(M, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
call MPI_Bcast(N, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
call MPI_Bcast(L, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
if ( mod(m,nprocs).ne.0 .or. mod(l,nprocs).ne.0 ) then
if (myrank.eq.0) print *, 'M or L cannot be divided by nprocs!'
call MPI_Finalize(ierr)
stop
endif
ia = 1
ib = ia + m/nprocs ! n
ic = ib + n ! l/nprocs
iwk = ic + m/nprocs ! l
iend = iwk + n ! l/nprocs
if ( iend .gt. nbuffer+1 ) then
if (myrank.eq.0) print *, 'Insufficient buffer size!'
call MPI_Finalize(ierr)
stop
endif
call init( m, n, l, myrank, nprocs, buf(ia), buf(ib), buf(ic)
& , buf2(ia),buf2(ib),buf2(ic) )
time_start = MPI_Wtime()
call matmul( m, n, l, myrank, nprocs, buf2(ia), buf2(ib), buf2(ic)
&, buf2(iwk) )
time_end = MPI_Wtime()
call check( m, n, l, myrank, nprocs, buf2(ia), buf2(ib), buf2(ic))
if ( myrank .eq. 0 ) then
print *, 'time = ', time_end-time_start
print *, 'mflops = ', m*(n+n-1.0)*l/(time_end-time_start)*1d-6
endif
print*,'ok'
call MPI_Finalize(ierr)
stop
end
!------------------------------------------------------------------
subroutine init(m, n, l, myrank, nprocs, a, b, c, a2, b2,c2)
implicit double precision (a-h, o-z)
include 'mpif.h'
dimension a(m/nprocs, n), b(n, l/nprocs), c(m/nprocs, l)
dimension a2(n, m/nprocs), b2(l/nprocs, n), c2(l,m/nprocs)
mloc = m/nprocs
lloc = l/nprocs
! Init. a, b
do j=1, n
do i=1, mloc
a(i,j) = i+myrank*mloc
enddo
enddo
do j=1, lloc
do i=1, n
b(i,j) = j+myrank*lloc
enddo
enddo
! Tranpose a, b ->a2, b2
do j=1, mloc
do i=1,n
a2(i,j) = a(j,i)
enddo
enddo
do j=1, n
do i=1,lloc
b2(i,j) = b(j,i)
enddo
enddo
return
end
!------------------------------------------------------------------
subroutine check(m, n, l, myrank, nprocs, a, b, c)
implicit double precision (a-h, o-z)
include 'mpif.h'
dimension a(m/nprocs, n), b(n, l/nprocs), c(m/nprocs, l)
!dimension a(n,m/nprocs), b(l/nprocs,n), c(l,m/nprocs)
integer local_code, code
mloc = m/nprocs
lloc = l/nprocs
!Check the results
local_code = 0
do i=1, l
do j=1, mloc
if ( abs(c(i,j) - n*dble(j+myrank*lloc)*i) .gt. 1d-10 ) then
local_code = 1
print*,'local_code=',local_code
goto 10
endif
enddo
enddo
10call MPI_Reduce( local_code, code, 1, MPI_INTEGER, MPI_SUM, 0,
&MPI_COMM_WORLD, ierr)
!
if ( myrank .eq. 0 ) then
print *, 'code = ', code
endif
!
return
end
*!Parallel multiplication of matrices using MPI_Isend/MPI_Irecv
*
subroutine matmul(m, n, l, myrank, nprocs, a, b, c, work)
implicit double precision (a-h, o-z)
include 'mpif.h'
dimension a(n,m/nprocs), b(l/nprocs,n), c(l/nprocs,m),
& work(n,m/nprocs)
integer src, dest, tag
integer status(MPI_STATUS_SIZE, 2), request(2)
*
mloc = m/nprocs
lloc = l/nprocs
*
dest = mod( myrank-1+nprocs, nprocs )
src = mod( myrank+1,nprocs )
*
jpos=myrank*mloc
print*,'myrank=',myrank
c print*,'dest=',dest,'src=',src
c print*,'jpos=',jpos,'tag=',tag
*
do ip=1, nprocs - 1
tag = 10000 + ip
*
call MPI_Isend( a, n*mloc, MPI_DOUBLE_PRECISION, dest, tag,
& MPI_COMM_WORLD, request(1), ierr )
call MPI_Irecv( work, n*mloc, MPI_DOUBLE_PRECISION, src, tag,
& MPI_COMM_WORLD, request(2), ierr )
*
do i=1, lloc
do j=1, mloc
sum=0.d0
do k=1, n
sum = sum + b(i,k) * a(k,j)
enddo
c(i, j+jpos) = sum
enddo
enddo
*
call MPI_Waitall(2, request, status, ierr)
*
* 拷贝 work ->b (可以通过在计算/通信中交替使用 b/work 来避该免 *** 作)
do i=1, n
do j=1, mloc
a(i,j) = work(i,j)
enddo
enddo
*
jpos = jpos + mloc
if ( jpos .ge. m ) jpos = 0
*
enddo
*
do i=1, lloc
do j=1, mloc
sum=0.d0
do k=1, n
sum = sum + b(i,k) * a(k,j)
enddo
c(i, j+jpos) = sum
enddo
enddo
*
print*,'c(1,mloc)=',c(1,mloc)
print*,'c(1,2)=', c(1,2)
print*,'c(2,1)=', c(2,1)
print*,'c(lloc,1)=',c(lloc,1)
return
end
C语言利用数组计算超大整数的阶乘代码#include <stdio.h>
int main()
{
int n
int a[9000]//确保保存最终运算结果的数组足够大
int digit = 1//位数
int temp //阶乘的任一元素与临时结果的某位的乘积结果
int i, j, carry//carry:进位
printf("please in put n:\n")
scanf("%d",&n)
a[0] = 1 //将结果先初始化为1
for ( i=2i<=ni++ ) //开始阶乘,阶乘元素从2开始依次"登场"
{ //按最基本的乘法运算思想来考虑,将临时结果的每位与阶乘元素相乘
for( j=1, carry=0 j<=digitj++ )
{
temp = a[j-1] * i + carry//相应阶乘中的一项与当前所得临时结果的某位相乘(加上进位)
a[j-1] = temp % 10//更新临时结果的位上信息
carry = temp / 10//看是否有进位
}
while(carry)
{//如果有进位
a[++digit-1] = carry % 10//新加一位,添加信息。位数增1
carry = carry / 10//看还能不能进位
}
}
printf("n ! = ") //显示结果
for(j = digitj >=1j--)
{
printf("%d",a[j-1])
}
printf("\n")
return 0
}#include <stdio.h> int main(){int n int a[9000]//确保保存最终运算结果的数组足够大 int digit = 1//位数 int temp //阶乘的任一元素与临时结果的某位的乘积结果 int i, j, carry//carry:进位 printf("please in put n:\n") scanf("%d",&n) a[0] = 1 //将结果先初始化为1 for ( i=2i<=ni++ ) //开始阶乘,阶乘元素从2开始依次"登场"{ //按最基本的乘法运算思想来考虑,将临时结果的每位与阶乘元素相乘 for( j=1, carry=0 j<=digitj++ ){temp = a[j-1] * i + carry//相应阶乘中的一项与当前所得临时结果的某位相乘(加上进位) a[j-1] = temp % 10//更新临时结果的位上信息 carry = temp / 10//看是否有进位 }while(carry){//如果有进位 a[++digit-1] = carry % 10//新加一位,添加信息。位数增1carry = carry / 10//看还能不能进位 }}printf("n ! = ") //显示结果for(j = digitj >=1j--){printf("%d",a[j-1]) }printf("\n") return 0 }
可以调用 matrix2() 或 matrix() 做矩阵相乘。下面主函数 以 调用 matrix() 为例,介绍过程:
输入参数,动态分配数组,输入矩阵数据,调用,输出,释放内存。
#include<iostream>
using namespace std
#include<stdio.h>
/*---------------------
c[j]][i] = a[j][k] * b[k][i] = c[j][i]
a[ny][nk]
b[nk][nx]
c[ny][nx]
*---------------------*/
void matrix2(int *a,int *b, int *c, int nx, int ny, int nk)
{
int i,j,k
for(j=0j<nyj++){
for(i=0i<nxi++){
c[j*nx+i]=0
for(k=0k<nkk++)c[j*nx+i]+= a[j*nk+k] * b[k*nx+i]
}
}
}
/*---------------------
b[j][k] * c[k][i] = a[j][i]
*---------------------*/
void matrix(int **b,int **c, int **a, int nx, int ny, int nk)
{
int i,j,k
for (j=0j<nyj++)for(i=0i<nxi++)a[j][i]= 0
for(j=0j<nyj++){
for(i=0i<nxi++){
for(k=0k<nkk++)a[j][i]+= b[j][k]*c[k][i]
}
}
}
int main()
{
int i,j,k,tmp
int b_row,b_col
int c_row,c_col
int a_row,a_col
int **b,**c,**a
printf("please enter b_row b_col of matrix B\n")
scanf("%d %d",&b_row,&b_col)
c_row = b_col
printf("please enter c_col of matrix C\n")
scanf("%d",&c_col)
a_row = b_row
a_col = c_col
a = (int **) malloc(sizeof(int *) * a_row)
for (j=0j<a_rowj++){
a[j] = (int *) malloc(sizeof(int) * a_col)
}
b = (int **) malloc(sizeof(int *) * b_row)
for (j=0j<b_rowj++){
b[j] = (int *) malloc(sizeof(int) * b_col)
}
c = (int **) malloc(sizeof(int *) * c_row)
for (j=0j<c_rowj++){
c[j] = (int *) malloc(sizeof(int) * c_col)
}
if (!c[c_row-1]) {
printf("no enought memory\n")exit(0)
}
printf("Please input int matrix b[%d][%d]\n",b_row,b_col)
for (j=0j<b_rowj++)
for (i=0i<b_coli++){
scanf("%d",&tmp)
b[j][i] = tmp
}
printf("Please input int matrix c[%d][%d]\n",c_row,c_col)
for (j=0j<c_rowj++)
for (i=0i<c_coli++){
scanf("%d",&tmp)
c[j][i] = tmp
}
matrix( b ,c,a, a_col, a_row, b_col)
for(j=0j<a_rowj++)
{
for (i=0i<a_coli++) printf("%d ",a[j][i])
printf("\n")
}
for (i=0i<a_rowi++) free((void *)a[i])free((void *)a)
for (i=0i<b_rowi++) free((void *)b[i])free((void *)b)
for (i=0i<c_rowi++) free((void *)c[i])free((void *)c)
return 0
}
数据和结果例子:
please enter b_row b_col of matrix B
3 2
please enter c_col of matrix C
3
Please input int matrix b[3][2]
1 2 3 4 5 6
Please input int matrix c[2][3]
1 2 3 4 5 6
9 12 15
19 26 33
29 40 51
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)