1、程序运行输入数据时,第一辩基行为A矩阵的行列数和B矩阵的行列数,接着分别输入A、B两个矩阵的值。
2、首先,定义6个整型变量,保存A、B矩阵的行和列,以及控制循环的变量,k则用于实现矩阵的乘法。
3、接着,定义三个整型二维数组,保存A、B和C矩阵的各元素。
4、输入三个矩阵的行数和列数,保存在变量a、b、c中。
5、输入矩阵A的各元素,保存在数组X中。
6、输入矩阵B的各元素,保存在数组Y中。
7、将二维数组Z的各元素,初始化为0。
8、携旁谨用两层for循环,控制矩阵的乘法,并输出乘法所得的结果。
9、计算A矩阵和B矩阵的乘法,结果保存在数组Z中。
10、最后,输出启唯乘法所得的结果,即输出Z数组中的所有元素。
11、运行程序,输入矩阵A和B的行数和列数,以及A矩阵和B矩阵的所有元素,电脑就会计算出乘积C矩阵的所有元素,并输出C矩阵。
可以调用 matrix2() 或 matrix() 做矩阵相乘。下面主函数 以 调用 matrix() 为例,介绍过程:
输入参数,动态分配数组友念,输入矩阵数据,调用,输出,释放内存。
#include<iostream>
using namespace std
#include<stdio.h>
/*---------------------
c[j]][i] = a[j][k] * b[k][i] = c[j][i]
a[ny][nk]
b[nk][nx]
c[ny][nx]
*---------------------*/
void matrix2(int *a,int *b, int *c, int nx, int ny, int nk)
{
int i,j,k
for(j=0j<nyj++){
for(i=0i<nxi++){
c[j*nx+i]=0
for(k=0k<nkk++)c[j*nx+i]+= a[j*nk+k] * b[k*nx+i]
}
}
}
/*---------------------
b[j][k] * c[k][i] = a[j][i]
*---------------------*/
void matrix(int **b,int **c, int **a, int nx, int ny, int nk)
{
int i,j,k
for (j=0j<nyj++)for(i=0i<nxi++)a[j][i]= 0
for(j=0j<nyj++){
for(i=0i<nxi++){
for(k=0k<nkk++)a[j][i]+= b[j][k]*c[k][i]
}
}
}
int main()
{
int i,j,k,tmp
int b_row,b_col
int c_row,c_col
int a_row,a_col
int **b,**c,**a
printf("please enter b_row b_col of matrix B\n")
scanf("%d %d"羡粗,&b_row,&b_col)
c_row = b_col
printf("please enter c_col of matrix C\n")
scanf("%d",&c_col)
a_row = b_row
a_col = c_col
a = (int **) malloc(sizeof(int *) * a_row)
for (j=0j<a_rowj++){
a[j] = (int *) malloc(sizeof(int) * a_col)
}
b = (int **) malloc(sizeof(int *) * b_row)
for (j=0j<b_rowj++){
b[j] = (int *) malloc(sizeof(int) * b_col)
}
c = (int **) malloc(sizeof(int *) * c_row)
for (j=0j<兄告镇c_rowj++){
c[j] = (int *) malloc(sizeof(int) * c_col)
}
if (!c[c_row-1]) {
printf("no enought memory\n")exit(0)
}
printf("Please input int matrix b[%d][%d]\n",b_row,b_col)
for (j=0j<b_rowj++)
for (i=0i<b_coli++){
scanf("%d",&tmp)
b[j][i] = tmp
}
printf("Please input int matrix c[%d][%d]\n",c_row,c_col)
for (j=0j<c_rowj++)
for (i=0i<c_coli++){
scanf("%d",&tmp)
c[j][i] = tmp
}
matrix( b ,c,a, a_col, a_row, b_col)
for(j=0j<a_rowj++)
{
for (i=0i<a_coli++) printf("%d ",a[j][i])
printf("\n")
}
for (i=0i<a_rowi++) free((void *)a[i])free((void *)a)
for (i=0i<b_rowi++) free((void *)b[i])free((void *)b)
for (i=0i<c_rowi++) free((void *)c[i])free((void *)c)
return 0
}
数据和结果例子:
please enter b_row b_col of matrix B
3 2
please enter c_col of matrix C
3
Please input int matrix b[3][2]
1 2 3 4 5 6
Please input int matrix c[2][3]
1 2 3 4 5 6
9 12 15
19 26 33
29 40 51
!! a cross b.f
!
! Fixed-Format Fortran Source File
! Generated by PGI Visual Fortran(R)
! 2010-12-12 21:58:04
!
!Parallel matrix multiplication: main program
program cross
implicit double precision (a-h, o-z)
include 'mpif.h'
parameter (nbuffer=128*1024*1024/8)
dimension buf(nbuffer),buf2(nbuffer)
double precision time_start, time_end
external init, check, matmul
call MPI_Init(ierr)
call MPI_Comm_rank(MPI_COMM_WORLD, myrank, ierr)
call MPI_Comm_size(MPI_COMM_WORLD, nprocs, ierr)
if (myrank.eq.0) then
print *, 'Enter M, N, L: '
call flush(6)
read(*,*) M, N, L
endif
call MPI_Bcast(M, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
call MPI_Bcast(N, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
call MPI_Bcast(L, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
if ( mod(m,nprocs).ne.0 .or. mod(l,nprocs).ne.0 ) then
if (myrank.eq.0) print *, 'M or L cannot be divided by nprocs!'
call MPI_Finalize(ierr)
stop
endif
ia = 1
ib = ia + m/nprocs ! n
ic = ib + n ! l/nprocs
iwk = ic + m/nprocs ! l
iend = iwk + n ! l/nprocs
if ( iend .gt. nbuffer+1 ) then
if (myrank.eq.0) print *, 'Insufficient buffer size!'
call MPI_Finalize(ierr)
stop
endif
call init( m, n, l, myrank, nprocs, buf(ia), buf(ib), buf(ic)
& , buf2(ia),buf2(ib),buf2(ic) )
time_start = MPI_Wtime()
call matmul( m, n, l, myrank, nprocs, buf2(ia), buf2(ib), buf2(ic)
&, buf2(iwk) )
time_end = MPI_Wtime()
call check( m, n, l, myrank, nprocs, buf2(ia), buf2(ib), buf2(ic))
if ( myrank .eq. 0 ) then
print *, 'time = ', time_end-time_start
print *, 'mflops = ', m*(n+n-1.0)*l/(time_end-time_start)*1d-6
endif
print*,'ok'
call MPI_Finalize(ierr)
stop
end
!------------------------------------------------------------------
subroutine init(m, n, l, myrank, nprocs, a, b, c, a2, b2,c2)
implicit double precision (a-h, o-z)
include 'mpif.h'
dimension a(m/nprocs, n), b(n, l/nprocs), c(m/nprocs, l)
dimension a2(n, m/nprocs), b2(l/nprocs, n), c2(l,m/nprocs)
mloc = m/nprocs
lloc = l/nprocs
! Init. a, b
do j=1, n
do i=1, mloc
a(i,j) = i+myrank*mloc
enddo
enddo
do j=1, lloc
do i=1, n
b(i,j) = j+myrank*lloc
enddo
enddo
! Tranpose a, b ->a2, b2
do j=1, mloc
do i=1,n
a2(i,j) = a(j,i)
enddo
enddo
do j=1, n
do i=1,lloc
b2(i,j) = b(j,i)
enddo
enddo
return
end
!------------------------------------------------------------------
subroutine check(m, n, l, myrank, nprocs, a, b, c)
implicit double precision (a-h, o-z)
include 'mpif.h'
dimension a(m/nprocs, n), b(n, l/nprocs), c(m/nprocs, l)
!dimension a(n,m/nprocs), b(l/nprocs,n), c(l,m/nprocs)
integer local_code, code
mloc = m/nprocs
lloc = l/nprocs
!Check the results
local_code = 0
do i=1, l
do j=1, mloc
if ( abs(c(i,j) - n*dble(j+myrank*lloc)*i) .gt. 1d-10 ) then
local_code = 1
print*,'local_code=',local_code
goto 10
endif
enddo
enddo
10call MPI_Reduce( local_code, code, 1, MPI_INTEGER, MPI_SUM, 0,
&MPI_COMM_WORLD, ierr)
!
if ( myrank .eq. 0 ) then
print *, 'code = ', code
endif
!
return
end
*!Parallel multiplication of matrices using MPI_Isend/MPI_Irecv
*
subroutine matmul(m, n, l, myrank, nprocs, a, b, c, work)
implicit double precision (a-h, o-z)
include 'mpif.h'
dimension a(n,m/nprocs), b(l/nprocs,n), c(l/nprocs,m),
& work(n,m/nprocs)
integer src, dest, tag
integer status(MPI_STATUS_SIZE, 2), request(2)
*
mloc = m/nprocs
lloc = l/nprocs
*
dest = mod( myrank-1+nprocs, nprocs )
src = mod( myrank+1,nprocs )
*
jpos=myrank*mloc
print*,'myrank=',myrank
c print*,'dest=',dest,'src=',src
c print*,'jpos=',jpos,'tag=',tag
*
do ip=1, nprocs - 1
tag = 10000 + ip
*
call MPI_Isend( a, n*mloc, MPI_DOUBLE_PRECISION, dest, tag,
& MPI_COMM_WORLD, request(1), ierr )
call MPI_Irecv( work, n*mloc, MPI_DOUBLE_PRECISION, src, tag,
& MPI_COMM_WORLD, request(2), ierr )
*
do i=1, lloc
do j=1, mloc
sum=0.d0
do k=1, n
sum = sum + b(i,k) * a(k,j)
enddo
c(i, j+jpos) = sum
enddo
enddo
*
call MPI_Waitall(2, request, status, ierr)
*
* 拷贝 work ->b (可以通过在计算/通信中交替使用 b/work 来避该免 *** 作)
do i=1, n
do j=1, mloc
a(i,j) = work(i,j)
enddo
enddo
*
jpos = jpos + mloc
if ( jpos .ge. m ) jpos = 0
*
enddo
*
do i=1, lloc
do j=1, mloc
sum=0.d0
do k=1, n
sum = sum + b(i,k) * a(k,j)
enddo
c(i, j+jpos) = sum
enddo
enddo
*
print*,'c(1,mloc)=',c(1,mloc)
print*,'c(1,2)=', c(1,2)
print*,'c(2,1)=', c(2,1)
print*,'c(lloc,1)=',c(lloc,1)
return
end
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)