pybind11—python numpy与C++数据传递

前言

一些处理矩阵运算,图像处理算法,直接采用python实现可能速度稍微慢,效率不高,或者为了直接在 python中调用其他C++第三方库 。 图像,矩阵在python中通常表示为 numpy.ndarray ,因此如何在C++中解析numpy对象,numpy的数据如何传递到C++非常关键,解决了这些问题,就可以丝滑的在python numpy和C++中切换,互相调用。

开发、测试环境

  • windows 10, 64bit
  • Anaconda3, with python 3.7
  • Visual Studio 2017
  • pycharm
  • pybind11
  • C++ Numpy

    1d矩阵相加 py::array_t<double> add_arrays_1d(py::array_t<double>& input1, py::array_t<double>& input2) { // 获取input1, input2的信息 py::buffer_info buf1 = input1.request(); py::buffer_info buf2 = input2.request(); if (buf1.ndim !=1 || buf2.ndim !=1) throw std::runtime_error("Number of dimensions must be one"); if (buf1.size !=buf2.size) throw std::runtime_error("Input shape must match"); //申请空间 auto result = py::array_t<double>(buf1.size); py::buffer_info buf3 = result.request(); //获取numpy.ndarray 数据指针 double* ptr1 = (double*)buf1.ptr; double* ptr2 = (double*)buf2.ptr; double* ptr3 = (double*)buf3.ptr; //指针访问numpy.ndarray for (int i = 0; i < buf1.shape[0]; i++) ptr3[i] = ptr1[i] + ptr2[i]; return result; 2d矩阵相加 py::array_t<double> add_arrays_2d(py::array_t<double>& input1, py::array_t<double>& input2) { py::buffer_info buf1 = input1.request(); py::buffer_info buf2 = input2.request(); if (buf1.ndim != 2 || buf2.ndim != 2) throw std::runtime_error("numpy.ndarray dims must be 2!"); if ((buf1.shape[0] != buf2.shape[0])|| (buf1.shape[1] != buf2.shape[1])) throw std::runtime_error("two array shape must be match!"); //申请内存 auto result = py::array_t<double>(buf1.size); //转换为2d矩阵 result.resize({buf1.shape[0],buf1.shape[1]}); py::buffer_info buf_result = result.request(); //指针访问读写 numpy.ndarray double* ptr1 = (double*)buf1.ptr; double* ptr2 = (double*)buf2.ptr; double* ptr_result = (double*)buf_result.ptr; for (int i = 0; i < buf1.shape[0]; i++) for (int j = 0; j < buf1.shape[1]; j++) auto value1 = ptr1[i*buf1.shape[1] + j]; auto value2 = ptr2[i*buf2.shape[1] + j]; ptr_result[i*buf_result.shape[1] + j] = value1 + value2; return result; //py::array_t<double> add_arrays_3d(py::array_t<double>& input1, py::array_t<double>& input2) { // py::buffer_info buf1 = input1.request(); // py::buffer_info buf2 = input2.request(); // if (buf1.ndim != 3 || buf2.ndim != 3) // throw std::runtime_error("numpy array dim must is 3!"); // for (int i = 0; i < buf1.ndim; i++) // { // if (buf1.shape[i]!=buf2.shape[i]) // { // throw std::runtime_error("inputs shape must match!"); // } // } // // 输出 // auto result = py::array_t<double>(buf1.size); // result.resize({ buf1.shape[0], buf1.shape[1], buf1.shape[2] }); // py::buffer_info buf_result = result.request(); // // 指针读写numpy数据 // double* ptr1 = (double*)buf1.ptr; // double* ptr2 = (double*)buf2.ptr; // double* ptr_result = (double*)buf_result.ptr; // for (int i = 0; i < buf1.size; i++) // { // std::cout << ptr1[i] << std::endl; // } // /*for (int i = 0; i < buf1.shape[0]; i++) // { // for (int j = 0; j < buf1.shape[1]; j++) // { // for (int k = 0; k < buf1.shape[2]; k++) // { // double value1 = ptr1[i*buf1.shape[1] * buf1.shape[2] + k]; // double value2 = ptr2[i*buf2.shape[1] * buf2.shape[2] + k]; // double value1 = ptr1[i*buf1.shape[1] * buf1.shape[2] + k]; // double value2 = ptr2[i*buf2.shape[1] * buf2.shape[2] + k]; // ptr_result[i*buf1.shape[1] * buf1.shape[2] + k] = value1 + value2; // std::cout << value1 << " "; // } // std::cout << std::endl; // } // }*/ // return result; numpy.ndarray 相加, 3d矩阵 @return 3d numpy.ndarray py::array_t<double> add_arrays_3d(py::array_t<double>& input1, py::array_t<double>& input2) { //unchecked<N> --------------can be non-writeable //mutable_unchecked<N>-------can be writeable auto r1 = input1.unchecked<3>(); auto r2 = input2.unchecked<3>(); py::array_t<double> out = py::array_t<double>(input1.size()); out.resize({ input1.shape()[0], input1.shape()[1], input1.shape()[2] }); auto r3 = out.mutable_unchecked<3>(); for (int i = 0; i < input1.shape()[0]; i++) for (int j = 0; j < input1.shape()[1]; j++) for (int k = 0; k < input1.shape()[2]; k++) double value1 = r1(i, j, k); double value2 = r2(i, j, k); //下标索引访问 numpy.ndarray r3(i, j, k) = value1 + value2; return out; PYBIND11_MODULE(numpy_demo2, m) { m.doc() = "Simple demo using numpy!"; m.def("add_arrays_1d", &add_arrays_1d); m.def("add_arrays_2d", &add_arrays_2d); m.def("add_arrays_3d", &add_arrays_3d);

    python测试代码

    import demo9.numpy_demo2 as numpy_demo2
    import numpy as np
    var1 = numpy_demo2.add_arrays_1d(np.array([1, 3, 5, 7, 9]),
                                     np.array([2, 4, 6, 8, 10]))
    print('-'*50)
    print('var1', var1)
    var2 = numpy_demo2.add_arrays_2d(np.array(range(0,16)).reshape([4, 4]),
                                     np.array(range(20,36)).reshape([4, 4]))
    print('-'*50)
    print('var2', var2)
    input1 = np.array(range(0, 48)).reshape([4, 4, 3])
    input2 = np.array(range(50, 50+48)).reshape([4, 4, 3])
    var3 = numpy_demo2.add_arrays_3d(input1,
                                     input2)
    print('-'*50)
    print('var3', var3)
    

    demo2—— 图像RGB转 GRAY

    RGB图像表示为MxNx3的矩阵, GRAY灰度图像表示为MxN的矩阵。
    RGB图像-------numpy.ndarray(data, dtype=np.uint8, shape=[M, N, 3])

    GRAY图像-------numpy.ndarray(data, dtype=np.uint8, shape=[M, N, 1])

    C++代码

    图像RGB 转 GRAY py::array_t<double> rgb_to_gray(py::array_t<unsigned char>& img_rgb) { if (img_rgb.ndim()!=3) throw std::runtime_error("RGB image must has 3 channels!"); py::array_t<unsigned char> img_gray = py::array_t<unsigned char>(img_rgb.shape()[0] * img_rgb.shape()[1]); img_gray.resize({ img_rgb.shape()[0], img_rgb.shape()[1] }); auto rgb = img_rgb.unchecked<3>(); auto gray = img_gray.mutable_unchecked<2>(); for (int i = 0; i < img_rgb.shape()[0]; i++) for (int j = 0; j < img_rgb.shape()[1]; j++) auto R = rgb(i, j, 0); auto G = rgb(i, j, 1); auto B = rgb(i, j, 2); auto GRAY = (R * 30 + G * 59 + B * 11 + 50) / 100; gray(i, j) = static_cast<unsigned char>(GRAY); return img_gray;

    python测试代码

    img_rgb = cv2.imread('F:\\lena\\lena_rgb.jpg', cv2.IMREAD_UNCHANGED)
    img_gray = numpy_demo2.rgb_to_gray(img_rgb)
    plt.imshow(img_gray, cmap=plt.gray())
    plt.show()
    namespace py = pybind11;
    py::array_t<std::complex<float>> my_fft1d_complex(py::array_t<std::complex<float>> input) {
        if (input.ndim() != 1)
            throw std::runtime_error("input dim must be 1");
        vector<complex<float>> in, out;
        auto r1 = input.unchecked<1>();
        for (int i = 0; i < input.size(); i++)
            in.push_back(r1(i));
        fft1d(in, out, in.size());
        py::array_t<std::complex<float>> result(out.size());
        auto r2 = result.mutable_unchecked<1>();
        for (int i = 0; i < out.size(); i++)
            r2(i) = out[i];
        return result;
    py::array_t< std::complex<float>> my_fft2d_img(py::array_t<unsigned char>& image) {
        if (image.ndim() != 2)
            throw std::runtime_error("image dim must is 2!");
        std::vector< std::complex<float>> in;
        std::vector< std::complex<float>> out;
        auto r1 = image.unchecked<2>();
        for (int i = 0; i < r1.shape(0); i++)
            for (int j = 0; j < r1.shape(1); j++)
                in.push_back({ static_cast<float>(r1(i,j)), 0 });
        fft2d(in, out, image.shape()[0], image.shape()[1]);
        py::array_t< std::complex<float>> result = py::array_t< std::complex<float>>(image.size());
        result.resize({ image.shape()[0], image.shape()[1] });
        auto r2 = result.mutable_unchecked<2>();
        int count = 0;
        for (int i = 0; i < r1.shape(0); i++)
            for (int j = 0; j < r1.shape(1); j++)
                r2(i, j) = out[count];
                count++;
        return result;
    PYBIND11_MODULE(fft_demo, m) {
        m.doc() = "Simple fft demo using FFTW";
        m.def("my_fft1d_complex", &my_fft1d_complex, py::return_value_policy::reference);
        m.def("my_fft2d_img", &my_fft2d_img, py::return_value_policy::reference);
    

    fft_tools.h

    #ifndef FFT_TOOLS_H_
    #define FFT_TOOLS_H_
    #if _WIN64
    #include<vector>
    #include<complex>
    #include<fftw3.h>
    using namespace std;
    void fft1d(const vector<complex<float>>& input, vector<complex<float>>& output, int n);
    void fft1d(double * input, double * output, int n);
    void fft1d(float * input, float * output, int n);
    void fft2d(double * input, double * output, int rows, int cols);
    void fft2d(float * input, float * output, int rows, int cols);
    void fft2d(const vector<complex<float>>& input, vector<complex<float>>& output, int rows, int cols);
     计算1维FFT
     http://www.fftw.org/fftw3_doc/Complex-One_002dDimensional-DFTs.html#Complex-One_002dDimensional-DFTs
    void fft1d(const vector<complex<float>>& input, vector<complex<float>>& output, int n) {
        fftwf_complex *in, *out;
        fftwf_plan p;
        in = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex) * input.size());
        out = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex) * n);
        for (int i = 0; i < input.size(); i++)
            in[i][0] = input.at(i).real();
            in[i][1] = input.at(i).imag();
        p = fftwf_plan_dft_1d(n, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
        fftwf_execute(p); /* repeat as needed */
        for (int i = 0; i < n; i++)
            output.push_back({ out[i][0], out[i][1] });
        fftwf_destroy_plan(p);
        fftwf_free(in);
        fftwf_free(out);
    void fft1d(const vector<complex<double>>& input, vector<complex<double>>& output, int n) {
        fftw_complex *in, *out;
        fftw_plan p;
        in = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * input.size());
        out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * n);
        for (int i = 0; i < input.size(); i++)
            in[i][0] = input.at(i).real();
            in[i][1] = input.at(i).imag();
        p = fftw_plan_dft_1d(n, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
        fftw_execute(p); /* repeat as needed */
        for (int i = 0; i < n; i++)
            output.push_back({ out[i][0], out[i][1] });
        fftw_destroy_plan(p);
        fftw_free(in);
        fftw_free(out);
    2维 FFT
    @param input [in] 输入数据,复数类型, rows*cols
    @param output [out] fft2d计算结果, same size as input
    @param rows [in] 矩阵行数
    @param cols [in] 矩阵列数
    http://www.fftw.org/fftw3_doc/Complex-Multi_002dDimensional-DFTs.html#Complex-Multi_002dDimensional-DFTs
    void fft2d(const vector<complex<double>>& input, vector<complex<double>>& output, int rows, int cols) {
        fftw_complex *in, *out;
        fftw_plan p;
        in = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * input.size());
        out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * rows*cols);
        for (int i = 0; i < input.size(); i++)
            in[i][0] = input.at(i).real();
            in[i][1] = input.at(i).imag();
        p = fftw_plan_dft_2d(rows, cols, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
        fftw_execute(p); /* repeat as needed */
        int size = rows * cols;
        for (int i = 0; i < size; i++)
            output.push_back({ out[i][0], out[i][1] });
        fftw_destroy_plan(p);
        fftw_free(in);
        fftw_free(out);
    2维 FFT
    @param input [in] 输入数据,复数类型, rows*cols
    @param output [out] fft2d计算结果, same size as input
    @param rows [in] 矩阵行数
    @param cols [in] 矩阵列数
    http://www.fftw.org/fftw3_doc/Complex-Multi_002dDimensional-DFTs.html#Complex-Multi_002dDimensional-DFTs
    void fft2d(const vector<complex<float>>& input, vector<complex<float>>& output, int rows, int cols) {
        fftwf_complex *in, *out;
        fftwf_plan p;
        in = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex) * input.size());
        out = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex) * rows*cols);
        for (int i = 0; i < input.size(); i++)
            in[i][0] = input.at(i).real();
            in[i][1] = input.at(i).imag();
        p = fftwf_plan_dft_2d(rows, cols, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
        fftwf_execute(p); /* repeat as needed */
        int size = rows * cols;
        for (int i = 0; i < size; i++)
            output.push_back({ out[i][0], out[i][1] });
        fftwf_destroy_plan(p);
        fftwf_free(in);
        fftwf_free(out);
    ********************************************************************************************
    ******************************实数计算FFT/IFFT**********************************************
    计算实数FFT
    @param input [in] data pointer to a float array
    @param output [out] 2 x input_size,
    output[0], [2], [4]... is real, output[1], [3], [5]... is imag
    @param n [in]
    void fft1d(double* input, double* output, int n) {
        double* in = (double*)fftw_malloc(sizeof(double)*n);
        fftw_complex* out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * n);
        memset(output,0, sizeof(double)*n);
        for (int i = 0; i < n; i++)
            in[i] = input[i];
            out[i][0] = 0;
            out[i][1] = 0;
        fftw_plan p = fftw_plan_dft_r2c_1d(n, in, out, FFTW_ESTIMATE);
        //fftw_execute(p);
        fftw_execute_dft_r2c(p, in, out);
        for (int i = 0; i < n; i++)
            output[i * 2] = out[i][0];  //real
            output[i * 2 + 1] = out[i][1];  //imag
        fftw_destroy_plan(p);
        fftw_free(in);
        fftw_free(out);
    计算实数FFT
    @param input [in] data pointer to a float array
    @param output [out] 2 x input_size,
    output[0], [2], [4]... is real, output[1], [3], [5]... is imag
    @param n [in]
    void fft1d(float* input, float* output, int n) {
        float* in = (float*)fftwf_malloc(sizeof(double)*n);
        auto out = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex) * n);
        memset(output, 0, sizeof(float)*n);
        for (int i = 0; i < n; i++)
            in[i] = input[i];
        auto p = fftwf_plan_dft_r2c_1d(n, in, out, FFTW_ESTIMATE);
        fftwf_execute(p);
        for (int i = 0; i < n; i++)
            output[i * 2] = out[i][0];  //real
            output[i * 2 + 1] = out[i][1];  //imag
        fftwf_destroy_plan(p);
        fftwf_free(in);
        fftwf_free(out);
    void fft2d(double* input, double* output, int rows, int cols){
        double* in = (double*)fftw_malloc(sizeof(double)*rows*cols);
        fftw_complex* out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*rows*cols);
        memset(output, 0, sizeof(double)*rows*cols*2);
        int size = rows * cols;
        for (int i = 0; i < size; i++)
            in[i] = input[i];
        auto p = fftw_plan_dft_r2c_2d(rows, cols, in, out, FFTW_ESTIMATE);
        fftw_execute(p);
        for (int i = 0; i < size; i++)
            output[i] = out[i][0];
            output[i * 2 + 1] = out[i][1];
        fftw_destroy_plan(p);
        fftw_free(in);
        fftw_free(out);
    void fft2d(float* input, float* output, int rows, int cols) {
        float* in = (float*)fftwf_malloc(sizeof(float)*rows*cols);
        fftwf_complex* out = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex)*rows*cols);
        memset(output, 0, sizeof(float)*rows*cols * 2);
        int size = rows * cols;
        for (int i = 0; i < size; i++)
            in[i] = input[i];
        auto p = fftwf_plan_dft_r2c_2d(rows, cols, in, out, FFTW_ESTIMATE);
        fftwf_execute(p);
        for (int i = 0; i < size; i++)
            output[i] = out[i][0];
            output[i * 2 + 1] = out[i][1];
        fftwf_destroy_plan(p);
        fftwf_free(in);
        fftwf_free(out);
    #if 0
    void fft2d(const vector<complex<float>>& input, vector<complex<float>>& output, int rows, int cols) {
        fftwf_complex* in = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex)*rows*cols);
        fftwf_complex* out = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex)*rows*cols);
        int cnt = 0;
        for (auto& i:input)
            in[cnt][0] = i.real();
            in[cnt][1] = i.imag();
        auto p = fftwf_plan_dft_2d(rows, cols, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
        fftwf_execute(p);
        int sz = rows * cols;
        for (int i = 0; i < sz; i++)
            output.push_back({ out[i][0], out[i][1] });
        fftwf_destroy_plan(p);
        fftwf_free(in);
        fftwf_free(out);
    #endif
    

    python

    import numpy as np
    import demo10.fft_demo as fft_demo
    import matplotlib.pyplot as plt
    import cv2
    from scipy.fftpack import fftshift
    x = np.array(range(0, 32), dtype=np.complex)
    var1 = fft_demo.my_fft1d_complex(x)
    print(var1)
    var2 = fft_demo.my_fft1d_complex(np.sin(np.linspace(0, np.pi*4, 32,dtype=np.complex)))
    plt.figure('fft1d_1')
    plt.subplot(1, 2, 1)
    plt.stem(np.real(x))
    plt.subplot(1, 2, 2)
    plt.stem(range(0, 32), np.abs(var1))
    plt.figure('fft1d_2')
    plt.subplot(1, 2, 1)
    plt.stem(np.linspace(0, np.pi*4, 32), np.sin(np.linspace(0, np.pi*4, 32)))
    plt.subplot(1, 2, 2)
    plt.stem(np.linspace(0, np.pi*4, 32), np.abs(var2))
    #plt.show()
    # ---------------------FFT2d-------------------------------------------
    image = cv2.imread('F:\\lena\\lena_gray.jpg', cv2.IMREAD_GRAYSCALE)
    var3 = fft_demo.my_fft2d_img(image)
    var4 = fftshift(var3)
    var5 = np.log(np.abs(var4) + 1)
    plt.figure('fft2d_1')
    plt.subplot(1, 2, 1)
    plt.axis('off')
    plt.imshow(image, cmap=plt.gray())
    plt.subplot(1, 2, 2)
    plt.axis('off')
    plt.imshow(var5, plt.gray())
    plt.axis('off')
    plt.show()