! a cross b.f
!
! Fixed-Format Fortran Source File
! Generated by PGI Visual Fortran(R)
! 2010-12-12 21:58:04
!
!Parallel matrix multiplication: main program
program cross
implicit double precision (a-h, o-z)
include 'mpif.h'
parameter (nbuffer=128*1024*1024/8)
dimension buf(nbuffer),buf2(nbuffer)
double precision time_start, time_end
external init, check, matmul
call MPI_Init(ierr)
call MPI_Comm_rank(MPI_COMM_WORLD, myrank, ierr)
call MPI_Comm_size(MPI_COMM_WORLD, nprocs, ierr)
if (myrank.eq.0) then
print *, 'Enter M, N, L: '
call flush(6)
read(*,*) M, N, L
endif
call MPI_Bcast(M, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
call MPI_Bcast(N, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
call MPI_Bcast(L, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
if ( mod(m,nprocs).ne.0 .or. mod(l,nprocs).ne.0 ) then
if (myrank.eq.0) print *, 'M or L cannot be divided by nprocs!'
call MPI_Finalize(ierr)
stop
endif
ia = 1
ib = ia + m/nprocs ! n
ic = ib + n ! l/nprocs
iwk = ic + m/nprocs ! l
iend = iwk + n ! l/nprocs
if ( iend .gt. nbuffer+1 ) then
if (myrank.eq.0) print *, 'Insufficient buffer size!'
call MPI_Finalize(ierr)
stop
endif
call init( m, n, l, myrank, nprocs, buf(ia), buf(ib), buf(ic)
& , buf2(ia),buf2(ib),buf2(ic) )
time_start = MPI_Wtime()
call matmul( m, n, l, myrank, nprocs, buf2(ia), buf2(ib), buf2(ic)
&, buf2(iwk) )
time_end = MPI_Wtime()
call check( m, n, l, myrank, nprocs, buf2(ia), buf2(ib), buf2(ic))
if ( myrank .eq. 0 ) then
print *, 'time = ', time_end-time_start
print *, 'mflops = ', m*(n+n-1.0)*l/(time_end-time_start)*1d-6
endif
print*,'ok'
call MPI_Finalize(ierr)
stop
end
!------------------------------------------------------------------
subroutine init(m, n, l, myrank, nprocs, a, b, c, a2, b2,c2)
implicit double precision (a-h, o-z)
include 'mpif.h'
dimension a(m/nprocs, n), b(n, l/nprocs), c(m/nprocs, l)
dimension a2(n, m/nprocs), b2(l/nprocs, n), c2(l,m/nprocs)
mloc = m/nprocs
lloc = l/nprocs
! Init. a, b
do j=1, n
do i=1, mloc
a(i,j) = i+myrank*mloc
enddo
enddo
do j=1, lloc
do i=1, n
b(i,j) = j+myrank*lloc
enddo
enddo
! Tranpose a, b ->a2, b2
do j=1, mloc
do i=1,n
a2(i,j) = a(j,i)
enddo
enddo
do j=1, n
do i=1,lloc
b2(i,j) = b(j,i)
enddo
enddo
return
end
!------------------------------------------------------------------
subroutine check(m, n, l, myrank, nprocs, a, b, c)
implicit double precision (a-h, o-z)
include 'mpif.h'
dimension a(m/nprocs, n), b(n, l/nprocs), c(m/nprocs, l)
!dimension a(n,m/nprocs), b(l/nprocs,n), c(l,m/nprocs)
integer local_code, code
mloc = m/nprocs
lloc = l/nprocs
!Check the results
local_code = 0
do i=1, l
do j=1, mloc
if ( abs(c(i,j) - n*dble(j+myrank*lloc)*i) .gt. 1d-10 ) then
local_code = 1
print*,'local_code=',local_code
goto 10
endif
enddo
enddo
10call MPI_Reduce( local_code, code, 1, MPI_INTEGER, MPI_SUM, 0,
&MPI_COMM_WORLD, ierr)
!
if ( myrank .eq. 0 ) then
print *, 'code = ', code
endif
!
return
end
*!Parallel multiplication of matrices using MPI_Isend/MPI_Irecv
*
subroutine matmul(m, n, l, myrank, nprocs, a, b, c, work)
implicit double precision (a-h, o-z)
include 'mpif.h'
dimension a(n,m/nprocs), b(l/nprocs,n), c(l/nprocs,m),
& work(n,m/nprocs)
integer src, dest, tag
integer status(MPI_STATUS_SIZE, 2), request(2)
*
mloc = m/nprocs
lloc = l/nprocs
*
dest = mod( myrank-1+nprocs, nprocs )
src = mod( myrank+1,nprocs )
*
jpos=myrank*mloc
print*,'myrank=',myrank
c print*,'dest=',dest,'src=',src
c print*,'jpos=',jpos,'tag=',tag
*
do ip=1, nprocs - 1
tag = 10000 + ip
*
call MPI_Isend( a, n*mloc, MPI_DOUBLE_PRECISION, dest, tag,
& MPI_COMM_WORLD, request(1), ierr )
call MPI_Irecv( work, n*mloc, MPI_DOUBLE_PRECISION, src, tag,
& MPI_COMM_WORLD, request(2), ierr )
*
do i=1, lloc
do j=1, mloc
sum=0.d0
do k=1, n
sum = sum + b(i,k) * a(k,j)
enddo
c(i, j+jpos) = sum
enddo
enddo
*
call MPI_Waitall(2, request, status, ierr)
*
* 拷贝 work ->b (可以通过在计算/通信中交替使用 b/work 来避该免 *** 作)
do i=1, n
do j=1, mloc
a(i,j) = work(i,j)
enddo
enddo
*
jpos = jpos + mloc
if ( jpos .ge. m ) jpos = 0
*
enddo
*
do i=1, lloc
do j=1, mloc
sum=0.d0
do k=1, n
sum = sum + b(i,k) * a(k,j)
enddo
c(i, j+jpos) = sum
enddo
enddo
*
print*,'c(1,mloc)=',c(1,mloc)
print*,'c(1,2)=', c(1,2)
print*,'c(2,1)=', c(2,1)
print*,'c(lloc,1)=',c(lloc,1)
return
end
希望以下程序会对你有帮助#include <stdio.h>
#include <stdlib.h>基答
#include <unistd.h>
#include <string.h>
#include "mpi.h"
#define VERSION "0.1"
void usage(char * argv)
{
printf("Usage: %s [-h] [-V] \n\t\t[-m matrix_input_file] [-v vector_input_file]\n", argv)
exit(0)
}
void show_version()
{
printf("matrix_plus_vector v%s icymoon@NKU\n", VERSION)
exit(0)
}
void my_exit(char * msg, int quit)
{
printf("%s\n", msg)
exit(quit)
}
void record(int * result, int row_num)
{
int i
printf("==================================================\nThe result is:\n")
for(i = 0i <row_numi ++)
printf("%d\n", result[i])
}
int main(int argc, char * argv[])
{
int opt,rank, size, row, col, i,j, k
char * mfile = NULL
char * vfile = NULL
FILE * matrix_file
FILE * vector_file
int * rows//temp buffer for the row of matrix.
int sum
int flag = 0
MPI_Status status
MPI_Request request
/简仔/get options from command line
while((opt = getopt(argc, argv, "hVm:v:"搏咐慧)) != EOF)
{
switch(opt)
{
case 'h':
usage(argv[0])
break
case 'V':
show_version()
break
case 'm':
mfile = optarg
break
case 'v':
vfile = optarg
break
default:
usage(argv[0])
break
}
}
//check input files mfile &vfile
if(mfile == NULL || vfile == NULL)
my_exit("Invalid input file(s)!", 100)
if((matrix_file = fopen(mfile, "r")) == NULL)
my_exit("Can't open matrix file!", 101)
if((vector_file = fopen(vfile, "r")) == NULL)
my_exit("Can't open vector file!", 101)
//MPI initialization
MPI_Init(&argc, &argv)
MPI_Comm_rank(MPI_COMM_WORLD, &rank)//rank -- process ID
MPI_Comm_size(MPI_COMM_WORLD, &size)//size -- total of processes
if(rank == 0)
{
char buf[1024]
char * tmp_col
fclose(vector_file)
//Broadcast, inputdata error
if(fgets(buf, 1024, matrix_file)==NULL)
{
flag = -1
printf("Invalid matrix input file!\n")
fclose(matrix_file)
exit(0)
}
else
{
tmp_col = strstr(buf, " ")
*tmp_col = '\0'
row = atoi(buf)
col = atoi(tmp_col + 1)
printf("%d rows %d colums\n", row, col)
if(row <= 0 || col <= 0)
{
flag = -1
printf("Invalid matrix input file!\n")
fclose(matrix_file)
exit(0)
}
else
flag = 1
// MPI_Barrier(MPI_COMM_WORLD)
}
}
MPI_Bcast(&flag, 1, MPI_INT, 0, MPI_COMM_WORLD)
if(flag <0)
{
MPI_Finalize()
my_exit("All the processes were terminated!", 254)
}
MPI_Bcast(&col, 1, MPI_INT, 0, MPI_COMM_WORLD)
MPI_Bcast(&row, 1, MPI_INT, 0, MPI_COMM_WORLD)
MPI_Barrier(MPI_COMM_WORLD)
if(rank == 0)
{
char buf[1024]
int * result = (int*)malloc(sizeof(int)*row)
rows = (int*)malloc(sizeof(int)*col)
if(rows == NULL)
{
MPI_Finalize()
my_exit("malloc error!", 254)
}
if(row <= (size-1))
{
int slave_process = 1
while(fgets(buf, 1024, matrix_file) != NULL)
{
char * get_row
char * head = buf
//initiate every row of the matrix
for(j = 0j <col - 1j ++)
{
get_row = strstr(head, " ")
*get_row = '\0'
rows[j] = atoi(head)
printf("%d ", rows[j])
head = get_row + 1
}
rows[j] = atoi(head)
printf("%d\n", rows[j])
//send the data to slave processes.
MPI_Isend(rows, col, MPI_INT, slave_process, 0, MPI_COMM_WORLD, &request)
slave_process++
}
//gather the result
for(j = 0j <rowj ++)
MPI_Recv(&result[j], 1, MPI_INT, j+1, 0, MPI_COMM_WORLD, &status)
record(result, row)
}
else
{
int steps = row/(size-1)
for(i = 1i <= stepsi ++)
{
for(k = 1k <= (size-1)k ++)
{
if(fgets(buf, 1024, matrix_file) != NULL)
{
char * get_row
char * head = buf
//initiate every row of the matrix
for(j = 0j <col - 1j ++)
{
get_row = strstr(head, " ")
*get_row = '\0'
rows[j] = atoi(head)
printf("%d ", rows[j])
head = get_row + 1
}
rows[j] = atoi(head)
printf("%d\n", rows[j])
//send the data to slave processes.
MPI_Isend(rows, col, MPI_INT, k, 0, MPI_COMM_WORLD, &request)
}
}
for(j = 0j <(size-1)j ++)
{
MPI_Recv(&result[j+(i-1)*(size-1)], 1, MPI_INT, j+1, 0, MPI_COMM_WORLD, &status)
}
}
for(i = 1i <= row-steps*(size-1)i ++)
{
if(fgets(buf, 1024, matrix_file) != NULL)
{
char * get_row
char * head = buf
//initiate every row of the matrix
for(j = 0j <col - 1j ++)
{
get_row = strstr(head, " ")
*get_row = '\0'
rows[j] = atoi(head)
printf("%d ", rows[j])
head = get_row + 1
}
rows[j] = atoi(head)
printf("%d\n", rows[j])
//send the data to slave processes.
MPI_Isend(rows, col, MPI_INT, i, 0, MPI_COMM_WORLD, &request)
}
}
for(j = 0j <row-steps*(size-1)j ++)
{
MPI_Recv(&result[j+steps*(size-1)], 1, MPI_INT, j+1, 0, MPI_COMM_WORLD, &status)
}
record(result, row)
}
free(rows)
}
else
{//slave processes
char buf[32]
sum = 0
int * line//buffer to save the vector
//initiate the vector
line= (int *)malloc(sizeof(int) * col)
for(i = 0i <coli ++)
{
if(fgets(buf, 32, vector_file) != NULL)
line[i] = atoi(buf)
}
rows = (int *)malloc(sizeof(int) * col)
if(row <= (size - 1))
{
MPI_Recv(rows, col, MPI_INT, 0, 0, MPI_COMM_WORLD, &status)
for(i = 0i <coli ++)
sum += rows[i] * line[i]
MPI_Send(&sum, 1, MPI_INT, 0, 0, MPI_COMM_WORLD)
}
else
{
int steps = row/(size - 1)
for(j = 1j <= stepsj ++)
{
sum = 0
MPI_Recv(rows, col, MPI_INT, 0, 0, MPI_COMM_WORLD, &status)
for(i = 0i <coli ++)
sum += rows[i] * line[i]
MPI_Send(&sum, 1, MPI_INT, 0, 0, MPI_COMM_WORLD)
}
if(rank <=(row - steps*(size-1)))
{
sum = 0
MPI_Recv(rows, col, MPI_INT, 0, 0, MPI_COMM_WORLD, &status)
for(i = 0i <coli ++)
sum += rows[i] * line[i]
MPI_Send(&sum, 1, MPI_INT, 0, 0, MPI_COMM_WORLD)
}
}
free(line)
}
MPI_Finalize()
return 1
}
晕倒 老大 2行3列 和2行2列怎么乘啊。。。。。A=[1,2,35,8,9]
A =
1 2 3
5 8 9
B=[1,32,8]
.......
B =
1 3
2 8
B*A
ans =
162630
426878这样才行
MATLAB中 还有一种是点乘不过要全是同样的方阵
搞清楚了是这个吧
函数 kron
格式谈罩塌 C=kron (A,B) %A为m×n矩阵,B为p×q矩阵,则C为mp×nq矩阵含圆。
说明 A与B的张量积定义为: A B与B A均为mp×nq矩阵,但一般闷谈地A B B A。
例1-28 求A B。
>>A=[1 23 4]B=[1 2 34 5 67 8 9]
>>C=kron(A,B)
C =
1 2 3 2 4 6
4 5 6 8 10 12
7 8 9 14 16 18
3 6 9 4 8 12
12 15 18 16 20 24
21 24 27 28 32 36
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)