手写深度学习——numpy实现卷积层_python

一、Pytorch版本
二、numpy版本
三、验证结果
总结

一、Pytorch版本

定义有如下参数：

输入：batch_size = 1, input_channel = 3, input_h = 10, input_w = 10
卷积层定义：input_channel = 3，output_channel = 5, kernel_size = 3x3，stride=2, dilation=2
代码如下：

import torch
import torch.nn as nn

x_torch = torch.randn(1, 3, 10, 10)
# m.shape (5, 3, 3, 3): output_channel, input_channel, kernel_h, kernel_w
m = nn.Conv2d(3, 5, 3, stride=2, dilation=2)
y_torch = m(x_torch)
print('torch output shape: ', y_torch.shape)
print('torch output: ', y_torch)

二、numpy版本

import numpy as np


class Conv2d:
    def __init__(self, input_channel, output_channel, kernel_size, stride=1, padding=0, bias=True, dilation=1):
        self.input_channel = input_channel
        self.output_channel = output_channel
        self.stride = stride
        self.padding = padding
        self.dilation = (dilation, dilation) if isinstance(dilation, int) else dilation
        self.kernel_size = (kernel_size, kernel_size) if isinstance(kernel_size, int) else kernel_size
        self.weight = np.random.randn(output_channel, input_channel, self.kernel_size[0], self.kernel_size[1])
        self.bias = None
        if bias:
            self.bias = np.random.randn(output_channel)

    def __call__(self, inputs):
        return self.infer(inputs)

    def infer(self, inputs):
        # 根据参数，算出输出的shape
        batch_size, input_channel, height, width = inputs.shape
        output_h = (height + 2 * self.padding - self.dilation[0] * (self.kernel_size[0] - 1) - 1) // self.stride + 1
        output_w = (width + 2 * self.padding - self.dilation[1] * (self.kernel_size[1] - 1) - 1) // self.stride + 1
        outputs = np.zeros([batch_size, self.output_channel, output_h, output_w])

        # 计算padding之后的inputs_array
        inputs_padding = np.zeros([batch_size, input_channel, height + 2 * self.padding, width + 2 * self.padding])
        inputs_padding[:, :, self.padding: self.padding + height, self.padding:self.padding + width] = inputs

        # 如果有dilation，根据dilation之后的shape往kernel中插入0（注意，原self.weight不变）
        dilation_shape = self.dilation[0] * (self.kernel_size[0] - 1) + 1, self.dilation[1] * (self.kernel_size[1] - 1) + 1
        kernel = np.zeros((self.output_channel, input_channel, dilation_shape[0], dilation_shape[1]))
        if self.dilation[0] > 1:
            for i in range(self.kernel_size[0]):
                for j in range(self.kernel_size[1]):
                    kernel[:, :, self.dilation[0] * i, self.dilation[1] * j] = self.weight[:, :, i, j]

        # 开始前向计算
        for h in range(output_h):
            for w in range(output_w):
                input_ = inputs_padding[
                         :,
                         :,
                         h * self.stride:h * self.stride + dilation_shape[0],
                         w * self.stride:w * self.stride + dilation_shape[1]
                         ]
                # input_ shape : batch_size, output_channel, input_channel, dilation_shape[0], dilation_shape[1]
                input_ = np.repeat(input_[:, np.newaxis, :, :, :], self.output_channel, axis=1)
                # kernel_ shape: batch_size, output_channel, input_channel, dilation_shape[0], dilation_shape[1]
                kernel_ = np.repeat(kernel[np.newaxis, :, :, :, :], batch_size, axis=0)
                # output shape: batch_size, output_channel
                output = input_ * kernel_
                output = np.sum(output, axis=(-1, -2, -3))
                outputs[:, :, h, w] = output
        if self.bias is not None:
            bias_ = np.tile(self.bias.reshape(-1, 1), (1, output_h * output_w)).\
                reshape(self.output_channel, output_h, output_w)
            outputs += bias_
        return outputs

三、验证结果

为了验证结果是否正确，把pytorch版本的Conv2d的参数传给numpy版本的Conv2d，完整代码如下：

import torch
import torch.nn as nn
import numpy as np


class Conv2d:
    def __init__(self, input_channel, output_channel, kernel_size, stride=1, padding=0, bias=True, dilation=1):
        self.input_channel = input_channel
        self.output_channel = output_channel
        self.stride = stride
        self.padding = padding
        self.dilation = (dilation, dilation) if isinstance(dilation, int) else dilation
        self.kernel_size = (kernel_size, kernel_size) if isinstance(kernel_size, int) else kernel_size
        self.weight = np.random.randn(output_channel, input_channel, self.kernel_size[0], self.kernel_size[1])
        self.bias = None
        if bias:
            self.bias = np.random.randn(output_channel)

    def __call__(self, inputs):
        return self.infer(inputs)

    def infer(self, inputs):
        # 根据参数，算出输出的shape
        batch_size, input_channel, height, width = inputs.shape
        output_h = (height + 2 * self.padding - self.dilation[0] * (self.kernel_size[0] - 1) - 1) // self.stride + 1
        output_w = (width + 2 * self.padding - self.dilation[1] * (self.kernel_size[1] - 1) - 1) // self.stride + 1
        outputs = np.zeros([batch_size, self.output_channel, output_h, output_w])

        # 计算padding之后的inputs_array
        inputs_padding = np.zeros([batch_size, input_channel, height + 2 * self.padding, width + 2 * self.padding])
        inputs_padding[:, :, self.padding: self.padding + height, self.padding:self.padding + width] = inputs

        # 如果有dilation，根据dilation之后的shape往kernel中插入0（注意，原self.weight不变）
        dilation_shape = self.dilation[0] * (self.kernel_size[0] - 1) + 1, self.dilation[1] * (self.kernel_size[1] - 1) + 1
        kernel = np.zeros((self.output_channel, input_channel, dilation_shape[0], dilation_shape[1]))
        if self.dilation[0] > 1:
            for i in range(self.kernel_size[0]):
                for j in range(self.kernel_size[1]):
                    kernel[:, :, self.dilation[0] * i, self.dilation[1] * j] = self.weight[:, :, i, j]

        # 开始前向计算
        for h in range(output_h):
            for w in range(output_w):
                input_ = inputs_padding[
                         :,
                         :,
                         h * self.stride:h * self.stride + dilation_shape[0],
                         w * self.stride:w * self.stride + dilation_shape[1]
                         ]
                # input_ shape : batch_size, output_channel, input_channel, dilation_shape[0], dilation_shape[1]
                input_ = np.repeat(input_[:, np.newaxis, :, :, :], self.output_channel, axis=1)
                # kernel_ shape: batch_size, output_channel, input_channel, dilation_shape[0], dilation_shape[1]
                kernel_ = np.repeat(kernel[np.newaxis, :, :, :, :], batch_size, axis=0)
                # output shape: batch_size, output_channel
                output = input_ * kernel_
                output = np.sum(output, axis=(-1, -2, -3))
                outputs[:, :, h, w] = output
        if self.bias is not None:
            bias_ = np.tile(self.bias.reshape(-1, 1), (1, output_h * output_w)).\
                reshape(self.output_channel, output_h, output_w)
            outputs += bias_
        return outputs


if __name__ == '__main__':
    x_torch = torch.randn(1, 3, 10, 10)
    # m.shape (5, 3, 3, 3): output_channel, input_channel, kernel_h, kernel_w
    m = nn.Conv2d(3, 5, 3, stride=2, dilation=2)
    y_torch = m(x_torch)
    print('torch output shape: ', y_torch.shape)
    print('torch output: ', y_torch)

    # 把torch的输入转成numpy
    x_np = np.array(x_torch)
    m_np = Conv2d(3, 5, 3, stride=2, dilation=2)

    # 为了比较效果，把torch的Conv2d参数传递给numpy的Conv2d
    m_np.weight = m.weight.detach().numpy()
    m_np.bias = m.bias.detach().numpy()
    y_np = m_np(x_np)
    print('numpy output shape: ', y_np.shape)
    print('numpy output: ', y_np)

结果如下：

可以看到，两者的结果是一致的，验证成功。

总结

在代码的50行左右，这两句类似广播 *** 作，把input_和kernel_扩展到batch_size和output_channel的维度，这样就不用遍历batch_size和output_channel这两个维度。

                # input_ shape : batch_size, output_channel, input_channel, dilation_shape[0], dilation_shape[1]
                input_ = np.repeat(input_[:, np.newaxis, :, :, :], self.output_channel, axis=1)
                # kernel_ shape: batch_size, output_channel, input_channel, dilation_shape[0], dilation_shape[1]
                kernel_ = np.repeat(kernel[np.newaxis, :, :, :, :], batch_size, axis=0)

pytorch的Conv2d中如果有空洞卷积（dilation > 1），并不会往原来的kernel插入0，例如原来的kernel_size = 3x3，dilation=2，卷积核大小还是3x3，不会变成5x5，只是计算的时候会从输入间隔取值。

这里numpy的做法是根据原来weight，生成另一份用于计算的插零kernel，否则无法将torch版本的权重参数传递给numpy版本。
其他，还有一个参数group后续再补充。

(ps：代码应该还有可优化的地方)

结束。

欢迎分享，转载请注明来源：内存溢出

原文地址: https://outofmemory.cn/langs/571735.html

手写深度学习——numpy实现卷积层

发表评论

评论列表（0条）