android-GPU与CPU编程:处理时间不一致

android-GPU与CPU编程:处理时间不一致,第1张

概述我目前正在进行图像跟踪:由于有了摄像头,我可以跟踪与Android系统交互的手指触摸.图像处理是在带有OpenCL的GPU上完成的:我将相机输出转换为黑白帧,以便获得白色斑点.该方法的处理时间为65ms.由于我的目标是使程序更流畅,因此我使用OpenCV方法在CPU上执行了相同的 *** 作.这样处理时间

我目前正在进行图像跟踪:由于有了摄像头,我可以跟踪与Android系统交互的手指触摸.图像处理是在带有OpenCL的GPU上完成的:我将相机输出转换为黑白帧,以便获得白色斑点.该方法的处理时间为65ms.
由于我的目标是使程序更流畅,因此我使用OpenCV方法在cpu上执行了相同的 *** 作.这样处理时间为115ms.问题在于,使用OpenCV方法时,程序感觉更加灵敏,速度更快,而且我不了解在这种情况下处理时间如何更长:这似乎与我矛盾.
对于测量,我这样进行:

start= clock();finish = clock();double time =((double)finish -start)/CLOCKS_PER_SEC;std::cout<<"process time : "<< time<<std::endl;

这是我的代码:

static cv::Mat              original_Right,binary_Right;static cv::Mat              original_left, binary_left;int                 wIDth, height;clock_t                 start,finish;double time = 0.0;wIDth = (int) this->camera_Right.getCapture().get(cv::CAP_PROP_FRAME_WIDTH);height = (int) this->camera_Right.getCapture().get(cv::CAP_PROP_FRAME_HEIGHT);original_Right.create(height, wIDth, CV_8UC3);//--------------------------- Camera 2 ---------------------------------int wIDth_2 = (int) this->camera_left.getCapture().get(cv::CAP_PROP_FRAME_WIDTH);int height_2 = (int) this->camera_left.getCapture().get(cv::CAP_PROP_FRAME_HEIGHT);original_left.create(height_2, wIDth_2, CV_8UC3);binary_Right.create(height, wIDth, CV_32F); // FOR GPUbinary_left.create(height_2, wIDth_2, CV_32F); // FOR GPU//binary_Right.create(height, wIDth, CV_8UC1); // FOR cpu//binary_left.create(height_2, wIDth_2, CV_8UC1); // FOR cpuCore::running_ = true;//------------------------------------ SET UP THE GPU -----------------------------------------cl_context              context;cl_context_propertIEs   propertIEs [3];cl_kernel               kernel;cl_command_queue        command_queue;cl_program              program;cl_int                  err;cl_uint                 num_of_platforms=0;cl_platform_ID          platform_ID;cl_device_ID            device_ID;cl_uint                 num_of_devices=0;cl_mem                  input, output;size_t                  global;int                     data_size =height*wIDth*3;//load opencl sourcefile *fp;char filename[] = "./helloTedKrissV2.cl";char *source_str; //Load the source code containing the kernelfp = fopen(filename, "r");if (!fp) {fprintf(stderr, "Failed to load kernel.\n");exit(1);}source_str = (char*)malloc(MAX_SOURCE_SIZE);global = fread(source_str, 1, MAX_SOURCE_SIZE, fp);fclose(fp);//retreives a List of platforms availableif(clGetPlatformIDs(1,&platform_ID, &num_of_platforms)!=CL_SUCCESS){    std::cout<<"unable to get a platform_ID"<<std::endl;};// to get a supported GPU deviceif(clGetdeviceids(platform_ID,CL_DEVICE_TYPE_GPU,1,&device_ID, &num_of_devices)!= CL_SUCCESS){    std::cout<<"unable to get a device_ID"<<std::endl;      };//context propertIEs List - must be terminated with 0propertIEs[0]=CL_CONTEXT_PLATFORM;propertIEs[1]=(cl_context_propertIEs) platform_ID;propertIEs[2]=0;// create a context with the gpu devicecontext = clCreateContext(propertIEs,1,&device_ID,NulL,NulL,&err);//create command queue using the context and devicecommand_queue = clCreateCommandQueue(context,device_ID,0,&err);//create a program from the kernel source codeprogram= clCreateProgramWithSource(context,1,(const char **) &source_str, NulL,&err);// compile the programif(clBuildProgram(program,0,NulL,NulL,NulL,NulL)!=CL_SUCCESS){    size_t length;    std::cout<<"Error building program"<<std::endl;    char buffer[4096];    clGetProgramBuildInfo(program,device_ID,CL_PROGRAM_BUILD_LOG, sizeof(buffer),buffer,&length);    std::cout<< buffer <<std::endl;}//specify which kernel from the program to executekernel = clCreateKernel(program,"imageProcessing",&err);while (this->isRunning() == true) {     start= clock(); //--------------------- START----------------------    //----------------------FRAME---------------------    this->camera_Right.readFrame(original_Right);    if (original_Right.empty() == true ) {        std::cerr << "[Core/Error] Original  frame is empty." << std::endl;        break;    }    this->camera_left.readFrame(original_left);    if (original_left.empty() == true ) {        std::cerr << "[Core/Error] Original 2  frame is empty." << std::endl;        break;    }    //----------------------FRAME---------------------  //------------------------------------------------IMP GPU ------------------------------------------------------    input = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR  , sizeof(unsigned char)*data_size,NulL,NulL);    output =clCreateBuffer(context,CL_MEM_READ_WRITE   | CL_MEM_ALLOC_HOST_PTR, sizeof(float)*data_size/3,NulL,NulL);   if(clEnqueueWriteBuffer(command_queue,input,CL_TRUE,0,sizeof(unsigned char)*data_size, original_Right.data ,0,NulL,NulL )!= CL_SUCCESS){};    //set the argument List for the kernel command    clSetKernelArg(kernel,0,sizeof(cl_mem), &input);    clSetKernelArg(kernel,1,sizeof(cl_mem), &output);    global = data_size  ;    //enqueue the kernel command for execution    clEnqueueNDRangeKernel(command_queue, kernel, 1, NulL, &global, NulL,0,NulL,NulL);    clFinish(command_queue);    //copy the results from out of the  output buffer    if(clEnqueueReadBuffer(command_queue,output,CL_TRUE ,0,sizeof(float)*data_size/3,binary_Right.data,0,NulL,NulL )!= CL_SUCCESS){};    clReleaseMemObject(input);    clReleaseMemObject(output);    //------------------------------------------------IMP GPU ------------------------------------------------------    input = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR  , sizeof(unsigned char)*data_size,NulL,NulL);    output =clCreateBuffer(context,CL_MEM_READ_WRITE   | CL_MEM_ALLOC_HOST_PTR, sizeof(float)*data_size/3,NulL,NulL);   if(clEnqueueWriteBuffer(command_queue,input,CL_TRUE,0,sizeof(unsigned char)*data_size, original_left.data ,0,NulL,NulL )!= CL_SUCCESS){};    //set the argument List for the kernel command    clSetKernelArg(kernel,0,sizeof(cl_mem), &input);    clSetKernelArg(kernel,1,sizeof(cl_mem), &output);    global = data_size  ;    //enqueue the kernel command for execution    clEnqueueNDRangeKernel(command_queue, kernel, 1, NulL, &global, NulL,0,NulL,NulL);    clFinish(command_queue);    //copy the results from out of the  output buffer    if(clEnqueueReadBuffer(command_queue,output,CL_TRUE ,0,sizeof(float)*data_size/3,binary_left.data,0,NulL,NulL )!= CL_SUCCESS){};   clReleaseMemObject(input);   clReleaseMemObject(output);    //------------------------------------------------IMP GPU ------------------------------------------------------  // cpu METHOD  // adok::processing::doImageProcessing(original_Right, binary_Right);  // adok::processing::doImageProcessing(original_left, binary_left);    //-------------------------------------------------------------- TRACKING ------------------------------------------------------adok::tracking::doFingerContoursTracking(binary_Right,binary_left, this->fingerContours, this->perspective_Right,this->perspective_left, this->distortion_Right,this->distortion_left, this);    //------------------------------------------- TRACKING ----------------------------------------- //------------------------------SEND COORDINATES TO ANDROID BOARD--------------------if (getSIDeRight() && !getSIDeleft() ) {        std::cout<<"RIGHT : "<<std::endl;        this->uart_.sendAll(this->fingerContours, this->perspective_Right.getPerspectiveMatrix(), RIGHT);    }else if (!getSIDeRight() && getSIDeleft() ){        std::cout<<"left : "<<std::endl;        this->uart_.sendAll(this->fingerContours, this->perspective_left.getPerspectiveMatrix(), left);    }else if (getSIDeRight() && getSIDeleft() ){        std::cout<<"RIGHT & left : "<<std::endl;        this->uart_.sendAll(this->fingerContours, this->perspective_Right.getPerspectiveMatrix(), this->perspective_left.getPerspectiveMatrix());    }this->setSIDeRight(0);this->setSIDeleft(0);finish = clock();time =(double)(finish - start)/CLOCKS_PER_SEC;std::cout << "Time: " << time << std::endl; // ------------END-----------}clReleaseCommandQueue(command_queue);clReleaseProgram(program);clReleaseKernel(kernel);clReleaseContext(context);this->stop();

}

还有一点奇怪,当我在cpu上抓帧的时间是5毫秒,而在GPU上抓帧的时间是15毫秒,我不知道为什么它会增加.

而且我正在研究androID xu4.

解决方法:

在GPU计算中,有时可能要比cpu计算花费更多时间.因为,对于GPU计算,主进程将数据发送到GPU内存,而在进行数学计算之后,GPU将数据发送回cpu.因此,数据传输和接收回到cpu需要时间.如果计算出的缓冲区大小较大且传输时间较大,则GPU计算可能会花费更多时间. CUDNN库与GPU处理器一起使它快了很多倍.因此,如果您的程序未使用CUDNN,则速度可能会更慢.

总结

以上是内存溢出为你收集整理的android-GPU与CPU编程:处理时间不一致全部内容,希望文章能够帮你解决android-GPU与CPU编程:处理时间不一致所遇到的程序开发问题。

如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/web/1120857.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-05-29
下一篇 2022-05-29

发表评论

登录后才能评论

评论列表(0条)

保存