pkgsrc-Changes archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

CVS commit: pkgsrc/misc/libreoffice



Module Name:    pkgsrc
Committed By:   joerg
Date:           Mon Apr 20 13:00:48 UTC 2020

Modified Files:
        pkgsrc/misc/libreoffice: distinfo
Added Files:
        pkgsrc/misc/libreoffice/patches:
            patch-sc_source_core_opencl_formulagroupcl.cxx

Log Message:
Declare classes before dyncasting them


To generate a diff of this commit:
cvs rdiff -u -r1.97 -r1.98 pkgsrc/misc/libreoffice/distinfo
cvs rdiff -u -r0 -r1.1 \
    pkgsrc/misc/libreoffice/patches/patch-sc_source_core_opencl_formulagroupcl.cxx

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: pkgsrc/misc/libreoffice/distinfo
diff -u pkgsrc/misc/libreoffice/distinfo:1.97 pkgsrc/misc/libreoffice/distinfo:1.98
--- pkgsrc/misc/libreoffice/distinfo:1.97       Sat Apr 18 04:18:05 2020
+++ pkgsrc/misc/libreoffice/distinfo    Mon Apr 20 13:00:48 2020
@@ -1,4 +1,4 @@
-$NetBSD: distinfo,v 1.97 2020/04/18 04:18:05 ryoon Exp $
+$NetBSD: distinfo,v 1.98 2020/04/20 13:00:48 joerg Exp $
 
 SHA1 (libreoffice/0168229624cfac409e766913506961a8-ucpp-1.3.2.tar.gz) = 452eba922e4f41603539c9dc39947d2271e47093
 RMD160 (libreoffice/0168229624cfac409e766913506961a8-ucpp-1.3.2.tar.gz) = dbeb7a7f8c89961ca2e544b810345d025561866b
@@ -235,6 +235,7 @@ SHA1 (patch-external_postgresql_External
 SHA1 (patch-i18nutil_source_utility_unicode.cxx) = 014d48574a379f92f2d0afe3c91d2c2956708dd8
 SHA1 (patch-sal_osl_unx_system.hxx) = 478dd47dc512d283a554600951010bbaa10c2c0e
 SHA1 (patch-sal_rtl_cipher.cxx) = e5e46d0b96e25572be86bf26f0053436859736cd
+SHA1 (patch-sc_source_core_opencl_formulagroupcl.cxx) = 1a589c8190a28f603671faf1d84cac022425849e
 SHA1 (patch-sc_source_core_tool_math.cxx) = dd665c6bbe3c18fca0b5fbf847a6ec1ff50d0bb9
 SHA1 (patch-solenv_gbuild_Module.mk) = cbd6ca3acae187458e49fe76d973e6475ed5fe1d
 SHA1 (patch-solenv_gbuild_platform_NETBSD_AARCH64_GCC.mk) = eda8dbd0e9394b2dde2f98c0df39e7e9888f49dc

Added files:

Index: pkgsrc/misc/libreoffice/patches/patch-sc_source_core_opencl_formulagroupcl.cxx
diff -u /dev/null pkgsrc/misc/libreoffice/patches/patch-sc_source_core_opencl_formulagroupcl.cxx:1.1
--- /dev/null   Mon Apr 20 13:00:49 2020
+++ pkgsrc/misc/libreoffice/patches/patch-sc_source_core_opencl_formulagroupcl.cxx      Mon Apr 20 13:00:48 2020
@@ -0,0 +1,798 @@
+$NetBSD: patch-sc_source_core_opencl_formulagroupcl.cxx,v 1.1 2020/04/20 13:00:48 joerg Exp $
+
+--- sc/source/core/opencl/formulagroupcl.cxx.orig      2020-04-20 00:48:10.479759827 +0000
++++ sc/source/core/opencl/formulagroupcl.cxx
+@@ -1335,185 +1335,7 @@ public:
+     }
+ 
+     /// Emit the definition for the auxiliary reduction kernel
+-    virtual void GenSlidingWindowFunction( std::stringstream& ss )
+-    {
+-        if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
+-        {
+-            std::string name = Base::GetName();
+-            ss << "__kernel void " << name;
+-            ss << "_reduction(__global double* A, "
+-                "__global double *result,int arrayLength,int windowSize){\n";
+-            ss << "    double tmp, current_result =" <<
+-                mpCodeGen->GetBottom();
+-            ss << ";\n";
+-            ss << "    int writePos = get_group_id(1);\n";
+-            ss << "    int lidx = get_local_id(0);\n";
+-            ss << "    __local double shm_buf[256];\n";
+-            if (mpDVR->IsStartFixed())
+-                ss << "    int offset = 0;\n";
+-            else // if (!mpDVR->IsStartFixed())
+-                ss << "    int offset = get_group_id(1);\n";
+-            if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+-                ss << "    int end = windowSize;\n";
+-            else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+-                ss << "    int end = offset + windowSize;\n";
+-            else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+-                ss << "    int end = windowSize + get_group_id(1);\n";
+-            else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+-                ss << "    int end = windowSize;\n";
+-            ss << "    end = min(end, arrayLength);\n";
+-
+-            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    int loop = arrayLength/512 + 1;\n";
+-            ss << "    for (int l=0; l<loop; l++){\n";
+-            ss << "    tmp = " << mpCodeGen->GetBottom() << ";\n";
+-            ss << "    int loopOffset = l*512;\n";
+-            ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
+-            ss << "        tmp = legalize(" << mpCodeGen->Gen2(
+-                "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
+-            ss << "        tmp = legalize(" << mpCodeGen->Gen2(
+-                "A[loopOffset + lidx + offset + 256]", "tmp") << ", tmp);\n";
+-            ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
+-            ss << "        tmp = legalize(" << mpCodeGen->Gen2(
+-                "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
+-            ss << "    shm_buf[lidx] = tmp;\n";
+-            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    for (int i = 128; i >0; i/=2) {\n";
+-            ss << "        if (lidx < i)\n";
+-            ss << "            shm_buf[lidx] = ";
+-            // Special case count
+-            if (dynamic_cast<OpCount*>(mpCodeGen.get()))
+-                ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
+-            else
+-                ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]") << ";\n";
+-            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    }\n";
+-            ss << "        if (lidx == 0)\n";
+-            ss << "            current_result =";
+-            if (dynamic_cast<OpCount*>(mpCodeGen.get()))
+-                ss << "current_result + shm_buf[0]";
+-            else
+-                ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
+-            ss << ";\n";
+-            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    }\n";
+-            ss << "    if (lidx == 0)\n";
+-            ss << "        result[writePos] = current_result;\n";
+-            ss << "}\n";
+-        }
+-        else
+-        {
+-            std::string name = Base::GetName();
+-            /*sum reduction*/
+-            ss << "__kernel void " << name << "_sum";
+-            ss << "_reduction(__global double* A, "
+-                "__global double *result,int arrayLength,int windowSize){\n";
+-            ss << "    double tmp, current_result =" <<
+-                mpCodeGen->GetBottom();
+-            ss << ";\n";
+-            ss << "    int writePos = get_group_id(1);\n";
+-            ss << "    int lidx = get_local_id(0);\n";
+-            ss << "    __local double shm_buf[256];\n";
+-            if (mpDVR->IsStartFixed())
+-                ss << "    int offset = 0;\n";
+-            else // if (!mpDVR->IsStartFixed())
+-                ss << "    int offset = get_group_id(1);\n";
+-            if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+-                ss << "    int end = windowSize;\n";
+-            else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+-                ss << "    int end = offset + windowSize;\n";
+-            else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+-                ss << "    int end = windowSize + get_group_id(1);\n";
+-            else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+-                ss << "    int end = windowSize;\n";
+-            ss << "    end = min(end, arrayLength);\n";
+-            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    int loop = arrayLength/512 + 1;\n";
+-            ss << "    for (int l=0; l<loop; l++){\n";
+-            ss << "    tmp = " << mpCodeGen->GetBottom() << ";\n";
+-            ss << "    int loopOffset = l*512;\n";
+-            ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
+-            ss << "        tmp = legalize(";
+-            ss << "(A[loopOffset + lidx + offset]+ tmp)";
+-            ss << ", tmp);\n";
+-            ss << "        tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)";
+-            ss << ", tmp);\n";
+-            ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
+-            ss << "        tmp = legalize((A[loopOffset + lidx + offset] + tmp)";
+-            ss << ", tmp);\n";
+-            ss << "    shm_buf[lidx] = tmp;\n";
+-            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    for (int i = 128; i >0; i/=2) {\n";
+-            ss << "        if (lidx < i)\n";
+-            ss << "            shm_buf[lidx] = ";
+-            ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
+-            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    }\n";
+-            ss << "        if (lidx == 0)\n";
+-            ss << "            current_result =";
+-            ss << "current_result + shm_buf[0]";
+-            ss << ";\n";
+-            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    }\n";
+-            ss << "    if (lidx == 0)\n";
+-            ss << "        result[writePos] = current_result;\n";
+-            ss << "}\n";
+-            /*count reduction*/
+-            ss << "__kernel void " << name << "_count";
+-            ss << "_reduction(__global double* A, "
+-                "__global double *result,int arrayLength,int windowSize){\n";
+-            ss << "    double tmp, current_result =" <<
+-                mpCodeGen->GetBottom();
+-            ss << ";\n";
+-            ss << "    int writePos = get_group_id(1);\n";
+-            ss << "    int lidx = get_local_id(0);\n";
+-            ss << "    __local double shm_buf[256];\n";
+-            if (mpDVR->IsStartFixed())
+-                ss << "    int offset = 0;\n";
+-            else // if (!mpDVR->IsStartFixed())
+-                ss << "    int offset = get_group_id(1);\n";
+-            if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+-                ss << "    int end = windowSize;\n";
+-            else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+-                ss << "    int end = offset + windowSize;\n";
+-            else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
+-                ss << "    int end = windowSize + get_group_id(1);\n";
+-            else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
+-                ss << "    int end = windowSize;\n";
+-            ss << "    end = min(end, arrayLength);\n";
+-            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    int loop = arrayLength/512 + 1;\n";
+-            ss << "    for (int l=0; l<loop; l++){\n";
+-            ss << "    tmp = " << mpCodeGen->GetBottom() << ";\n";
+-            ss << "    int loopOffset = l*512;\n";
+-            ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
+-            ss << "        tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
+-            ss << ", tmp);\n";
+-            ss << "        tmp = legalize((isnan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)";
+-            ss << ", tmp);\n";
+-            ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
+-            ss << "        tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
+-            ss << ", tmp);\n";
+-            ss << "    shm_buf[lidx] = tmp;\n";
+-            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    for (int i = 128; i >0; i/=2) {\n";
+-            ss << "        if (lidx < i)\n";
+-            ss << "            shm_buf[lidx] = ";
+-            ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
+-            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    }\n";
+-            ss << "        if (lidx == 0)\n";
+-            ss << "            current_result =";
+-            ss << "current_result + shm_buf[0];";
+-            ss << ";\n";
+-            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
+-            ss << "    }\n";
+-            ss << "    if (lidx == 0)\n";
+-            ss << "        result[writePos] = current_result;\n";
+-            ss << "}\n";
+-        }
+-
+-    }
++    virtual void GenSlidingWindowFunction( std::stringstream& ss );
+ 
+     virtual std::string GenSlidingWindowDeclRef( bool ) const
+     {
+@@ -1527,213 +1349,28 @@ public:
+ 
+     /// Controls how the elements in the DoubleVectorRef are traversed
+     size_t GenReductionLoopHeader(
+-        std::stringstream& ss, int nResultSize, bool& needBody )
++        std::stringstream& ss, int nResultSize, bool& needBody );
++
++    virtual size_t Marshal( cl_kernel k, int argno, int w, cl_program mpProgram );
++
++    ~ParallelReductionVectorRef()
+     {
+-        assert(mpDVR);
+-        size_t nCurWindowSize = mpDVR->GetRefRowSize();
+-        std::string temp = Base::GetName() + "[gid0]";
+-        ss << "tmp = ";
+-        // Special case count
+-        if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
++        if (mpClmem2)
+         {
+-            ss << mpCodeGen->Gen2(temp, "tmp") << ";\n";
+-            ss << "nCount = nCount-1;\n";
+-            ss << "nCount = nCount +"; /*re-assign nCount from count reduction*/
+-            ss << Base::GetName() << "[gid0+" << nResultSize << "]" << ";\n";
++            cl_int err;
++            err = clReleaseMemObject(mpClmem2);
++            SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
++            mpClmem2 = nullptr;
+         }
+-        else if (dynamic_cast<OpCount*>(mpCodeGen.get()))
+-            ss << temp << "+ tmp";
+-        else
+-            ss << mpCodeGen->Gen2(temp, "tmp");
+-        ss << ";\n\t";
+-        needBody = false;
+-        return nCurWindowSize;
+     }
+ 
+-    virtual size_t Marshal( cl_kernel k, int argno, int w, cl_program mpProgram )
+-    {
+-        assert(Base::mpClmem == nullptr);
+-
+-        openclwrapper::KernelEnv kEnv;
+-        openclwrapper::setKernelEnv(&kEnv);
+-        cl_int err;
+-        size_t nInput = mpDVR->GetArrayLength();
+-        size_t nCurWindowSize = mpDVR->GetRefRowSize();
+-        // create clmem buffer
+-        if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == nullptr)
+-            throw Unhandled(__FILE__, __LINE__);
+-        double* pHostBuffer = const_cast<double*>(
+-            mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
+-        size_t szHostBuffer = nInput * sizeof(double);
+-        Base::mpClmem = clCreateBuffer(kEnv.mpkContext,
+-            cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
+-            szHostBuffer,
+-            pHostBuffer, &err);
+-        SAL_INFO("sc.opencl", "Created buffer " << Base::mpClmem << " size " << nInput << "*" << sizeof(double) << "=" << szHostBuffer << " using host buffer " << pHostBuffer);
+-
+-        mpClmem2 = clCreateBuffer(kEnv.mpkContext,
+-            CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+-            sizeof(double) * w, nullptr, nullptr);
+-        if (CL_SUCCESS != err)
+-            throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
+-        SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << w << "=" << (sizeof(double)*w));
++    size_t GetArrayLength() const { return mpDVR->GetArrayLength(); }
+ 
+-        // reproduce the reduction function name
+-        std::string kernelName;
+-        if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
+-            kernelName = Base::GetName() + "_reduction";
+-        else
+-            kernelName = Base::GetName() + "_sum_reduction";
+-        cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
+-        if (err != CL_SUCCESS)
+-            throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
+-        SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
++    size_t GetWindowSize() const { return mpDVR->GetRefRowSize(); }
+ 
+-        // set kernel arg of reduction kernel
+-        // TODO(Wei Wei): use unique name for kernel
+-        cl_mem buf = Base::GetCLBuffer();
+-        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
+-        err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
+-            static_cast<void*>(&buf));
+-        if (CL_SUCCESS != err)
+-            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++    bool GetStartFixed() const { return bIsStartFixed; }
+ 
+-        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
+-        err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
+-        if (CL_SUCCESS != err)
+-            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+-        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
+-        err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
+-        if (CL_SUCCESS != err)
+-            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+-        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
+-        err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
+-        if (CL_SUCCESS != err)
+-            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+-        // set work group size and execute
+-        size_t global_work_size[] = { 256, static_cast<size_t>(w) };
+-        size_t const local_work_size[] = { 256, 1 };
+-        SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
+-        err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
+-            global_work_size, local_work_size, 0, nullptr, nullptr);
+-        if (CL_SUCCESS != err)
+-            throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
+-        err = clFinish(kEnv.mpkCmdQueue);
+-        if (CL_SUCCESS != err)
+-            throw OpenCLError("clFinish", err, __FILE__, __LINE__);
+-        if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
+-        {
+-            /*average need more reduction kernel for count computing*/
+-            std::unique_ptr<double[]> pAllBuffer(new double[2 * w]);
+-            double* resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
+-                mpClmem2,
+-                CL_TRUE, CL_MAP_READ, 0,
+-                sizeof(double) * w, 0, nullptr, nullptr,
+-                &err));
+-            if (err != CL_SUCCESS)
+-                throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
+-
+-            for (int i = 0; i < w; i++)
+-                pAllBuffer[i] = resbuf[i];
+-            err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
+-            if (err != CL_SUCCESS)
+-                throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
+-
+-            kernelName = Base::GetName() + "_count_reduction";
+-            redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
+-            if (err != CL_SUCCESS)
+-                throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
+-            SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
+-
+-            // set kernel arg of reduction kernel
+-            buf = Base::GetCLBuffer();
+-            SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
+-            err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
+-                static_cast<void*>(&buf));
+-            if (CL_SUCCESS != err)
+-                throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+-            SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
+-            err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
+-            if (CL_SUCCESS != err)
+-                throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+-            SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
+-            err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
+-            if (CL_SUCCESS != err)
+-                throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+-            SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
+-            err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
+-            if (CL_SUCCESS != err)
+-                throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-
+-            // set work group size and execute
+-            size_t global_work_size1[] = { 256, static_cast<size_t>(w) };
+-            size_t const local_work_size1[] = { 256, 1 };
+-            SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
+-            err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
+-                global_work_size1, local_work_size1, 0, nullptr, nullptr);
+-            if (CL_SUCCESS != err)
+-                throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
+-            err = clFinish(kEnv.mpkCmdQueue);
+-            if (CL_SUCCESS != err)
+-                throw OpenCLError("clFinish", err, __FILE__, __LINE__);
+-            resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
+-                mpClmem2,
+-                CL_TRUE, CL_MAP_READ, 0,
+-                sizeof(double) * w, 0, nullptr, nullptr,
+-                &err));
+-            if (err != CL_SUCCESS)
+-                throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
+-            for (int i = 0; i < w; i++)
+-                pAllBuffer[i + w] = resbuf[i];
+-            err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
+-            // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
+-            if (CL_SUCCESS != err)
+-                SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
+-            if (mpClmem2)
+-            {
+-                err = clReleaseMemObject(mpClmem2);
+-                SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
+-                mpClmem2 = nullptr;
+-            }
+-            mpClmem2 = clCreateBuffer(kEnv.mpkContext,
+-                cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_COPY_HOST_PTR,
+-                w * sizeof(double) * 2, pAllBuffer.get(), &err);
+-            if (CL_SUCCESS != err)
+-                throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
+-            SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << w << "*" << sizeof(double) << "=" << (w*sizeof(double)) << " copying host buffer " << pAllBuffer.get());
+-        }
+-        // set kernel arg
+-        SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
+-        err = clSetKernelArg(k, argno, sizeof(cl_mem), &mpClmem2);
+-        if (CL_SUCCESS != err)
+-            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
+-        return 1;
+-    }
+-
+-    ~ParallelReductionVectorRef()
+-    {
+-        if (mpClmem2)
+-        {
+-            cl_int err;
+-            err = clReleaseMemObject(mpClmem2);
+-            SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
+-            mpClmem2 = nullptr;
+-        }
+-    }
+-
+-    size_t GetArrayLength() const { return mpDVR->GetArrayLength(); }
+-
+-    size_t GetWindowSize() const { return mpDVR->GetRefRowSize(); }
+-
+-    bool GetStartFixed() const { return bIsStartFixed; }
+-
+-    bool GetEndFixed() const { return bIsEndFixed; }
++    bool GetEndFixed() const { return bIsEndFixed; }
+ 
+ protected:
+     bool bIsStartFixed, bIsEndFixed;
+@@ -4395,6 +4032,378 @@ bool FormulaGroupInterpreterOpenCL::inte
+     return aRes.pushResultToDocument(rDoc, rTopPos);
+ }
+ 
++template<class Base>
++void ParallelReductionVectorRef<Base>::GenSlidingWindowFunction( std::stringstream& ss )
++{
++        if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
++        {
++            std::string name = Base::GetName();
++            ss << "__kernel void " << name;
++            ss << "_reduction(__global double* A, "
++                "__global double *result,int arrayLength,int windowSize){\n";
++            ss << "    double tmp, current_result =" <<
++                mpCodeGen->GetBottom();
++            ss << ";\n";
++            ss << "    int writePos = get_group_id(1);\n";
++            ss << "    int lidx = get_local_id(0);\n";
++            ss << "    __local double shm_buf[256];\n";
++            if (mpDVR->IsStartFixed())
++                ss << "    int offset = 0;\n";
++            else // if (!mpDVR->IsStartFixed())
++                ss << "    int offset = get_group_id(1);\n";
++            if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
++                ss << "    int end = windowSize;\n";
++            else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
++                ss << "    int end = offset + windowSize;\n";
++            else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
++                ss << "    int end = windowSize + get_group_id(1);\n";
++            else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
++                ss << "    int end = windowSize;\n";
++            ss << "    end = min(end, arrayLength);\n";
++
++            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
++            ss << "    int loop = arrayLength/512 + 1;\n";
++            ss << "    for (int l=0; l<loop; l++){\n";
++            ss << "    tmp = " << mpCodeGen->GetBottom() << ";\n";
++            ss << "    int loopOffset = l*512;\n";
++            ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
++            ss << "        tmp = legalize(" << mpCodeGen->Gen2(
++                "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
++            ss << "        tmp = legalize(" << mpCodeGen->Gen2(
++                "A[loopOffset + lidx + offset + 256]", "tmp") << ", tmp);\n";
++            ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
++            ss << "        tmp = legalize(" << mpCodeGen->Gen2(
++                "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
++            ss << "    shm_buf[lidx] = tmp;\n";
++            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
++            ss << "    for (int i = 128; i >0; i/=2) {\n";
++            ss << "        if (lidx < i)\n";
++            ss << "            shm_buf[lidx] = ";
++            // Special case count
++            if (dynamic_cast<OpCount*>(mpCodeGen.get()))
++                ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
++            else
++                ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]") << ";\n";
++            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
++            ss << "    }\n";
++            ss << "        if (lidx == 0)\n";
++            ss << "            current_result =";
++            if (dynamic_cast<OpCount*>(mpCodeGen.get()))
++                ss << "current_result + shm_buf[0]";
++            else
++                ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
++            ss << ";\n";
++            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
++            ss << "    }\n";
++            ss << "    if (lidx == 0)\n";
++            ss << "        result[writePos] = current_result;\n";
++            ss << "}\n";
++        }
++        else
++        {
++            std::string name = Base::GetName();
++            /*sum reduction*/
++            ss << "__kernel void " << name << "_sum";
++            ss << "_reduction(__global double* A, "
++                "__global double *result,int arrayLength,int windowSize){\n";
++            ss << "    double tmp, current_result =" <<
++                mpCodeGen->GetBottom();
++            ss << ";\n";
++            ss << "    int writePos = get_group_id(1);\n";
++            ss << "    int lidx = get_local_id(0);\n";
++            ss << "    __local double shm_buf[256];\n";
++            if (mpDVR->IsStartFixed())
++                ss << "    int offset = 0;\n";
++            else // if (!mpDVR->IsStartFixed())
++                ss << "    int offset = get_group_id(1);\n";
++            if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
++                ss << "    int end = windowSize;\n";
++            else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
++                ss << "    int end = offset + windowSize;\n";
++            else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
++                ss << "    int end = windowSize + get_group_id(1);\n";
++            else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
++                ss << "    int end = windowSize;\n";
++            ss << "    end = min(end, arrayLength);\n";
++            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
++            ss << "    int loop = arrayLength/512 + 1;\n";
++            ss << "    for (int l=0; l<loop; l++){\n";
++            ss << "    tmp = " << mpCodeGen->GetBottom() << ";\n";
++            ss << "    int loopOffset = l*512;\n";
++            ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
++            ss << "        tmp = legalize(";
++            ss << "(A[loopOffset + lidx + offset]+ tmp)";
++            ss << ", tmp);\n";
++            ss << "        tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)";
++            ss << ", tmp);\n";
++            ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
++            ss << "        tmp = legalize((A[loopOffset + lidx + offset] + tmp)";
++            ss << ", tmp);\n";
++            ss << "    shm_buf[lidx] = tmp;\n";
++            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
++            ss << "    for (int i = 128; i >0; i/=2) {\n";
++            ss << "        if (lidx < i)\n";
++            ss << "            shm_buf[lidx] = ";
++            ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
++            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
++            ss << "    }\n";
++            ss << "        if (lidx == 0)\n";
++            ss << "            current_result =";
++            ss << "current_result + shm_buf[0]";
++            ss << ";\n";
++            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
++            ss << "    }\n";
++            ss << "    if (lidx == 0)\n";
++            ss << "        result[writePos] = current_result;\n";
++            ss << "}\n";
++            /*count reduction*/
++            ss << "__kernel void " << name << "_count";
++            ss << "_reduction(__global double* A, "
++                "__global double *result,int arrayLength,int windowSize){\n";
++            ss << "    double tmp, current_result =" <<
++                mpCodeGen->GetBottom();
++            ss << ";\n";
++            ss << "    int writePos = get_group_id(1);\n";
++            ss << "    int lidx = get_local_id(0);\n";
++            ss << "    __local double shm_buf[256];\n";
++            if (mpDVR->IsStartFixed())
++                ss << "    int offset = 0;\n";
++            else // if (!mpDVR->IsStartFixed())
++                ss << "    int offset = get_group_id(1);\n";
++            if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
++                ss << "    int end = windowSize;\n";
++            else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
++                ss << "    int end = offset + windowSize;\n";
++            else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
++                ss << "    int end = windowSize + get_group_id(1);\n";
++            else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
++                ss << "    int end = windowSize;\n";
++            ss << "    end = min(end, arrayLength);\n";
++            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
++            ss << "    int loop = arrayLength/512 + 1;\n";
++            ss << "    for (int l=0; l<loop; l++){\n";
++            ss << "    tmp = " << mpCodeGen->GetBottom() << ";\n";
++            ss << "    int loopOffset = l*512;\n";
++            ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
++            ss << "        tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
++            ss << ", tmp);\n";
++            ss << "        tmp = legalize((isnan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)";
++            ss << ", tmp);\n";
++            ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
++            ss << "        tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
++            ss << ", tmp);\n";
++            ss << "    shm_buf[lidx] = tmp;\n";
++            ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
++            ss << "    for (int i = 128; i >0; i/=2) {\n";
++            ss << "        if (lidx < i)\n";
++            ss << "            shm_buf[lidx] = ";
++            ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
++            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
++            ss << "    }\n";
++            ss << "        if (lidx == 0)\n";
++            ss << "            current_result =";
++            ss << "current_result + shm_buf[0];";
++            ss << ";\n";
++            ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
++            ss << "    }\n";
++            ss << "    if (lidx == 0)\n";
++            ss << "        result[writePos] = current_result;\n";
++            ss << "}\n";
++        }
++}
++
++template<class Base>
++size_t ParallelReductionVectorRef<Base>::GenReductionLoopHeader(
++        std::stringstream& ss, int nResultSize, bool& needBody )
++    {
++        assert(mpDVR);
++        size_t nCurWindowSize = mpDVR->GetRefRowSize();
++        std::string temp = Base::GetName() + "[gid0]";
++        ss << "tmp = ";
++        // Special case count
++        if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
++        {
++            ss << mpCodeGen->Gen2(temp, "tmp") << ";\n";
++            ss << "nCount = nCount-1;\n";
++            ss << "nCount = nCount +"; /*re-assign nCount from count reduction*/
++            ss << Base::GetName() << "[gid0+" << nResultSize << "]" << ";\n";
++        }
++        else if (dynamic_cast<OpCount*>(mpCodeGen.get()))
++            ss << temp << "+ tmp";
++        else
++            ss << mpCodeGen->Gen2(temp, "tmp");
++        ss << ";\n\t";
++        needBody = false;
++        return nCurWindowSize;
++    }
++
++template<class Base>
++size_t ParallelReductionVectorRef<Base>::Marshal( cl_kernel k, int argno, int w, cl_program mpProgram )
++    {
++        assert(Base::mpClmem == nullptr);
++
++        openclwrapper::KernelEnv kEnv;
++        openclwrapper::setKernelEnv(&kEnv);
++        cl_int err;
++        size_t nInput = mpDVR->GetArrayLength();
++        size_t nCurWindowSize = mpDVR->GetRefRowSize();
++        // create clmem buffer
++        if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == nullptr)
++            throw Unhandled(__FILE__, __LINE__);
++        double* pHostBuffer = const_cast<double*>(
++            mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
++        size_t szHostBuffer = nInput * sizeof(double);
++        Base::mpClmem = clCreateBuffer(kEnv.mpkContext,
++            cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
++            szHostBuffer,
++            pHostBuffer, &err);
++        SAL_INFO("sc.opencl", "Created buffer " << Base::mpClmem << " size " << nInput << "*" << sizeof(double) << "=" << szHostBuffer << " using host buffer " << pHostBuffer);
++
++        mpClmem2 = clCreateBuffer(kEnv.mpkContext,
++            CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
++            sizeof(double) * w, nullptr, nullptr);
++        if (CL_SUCCESS != err)
++            throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
++        SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << w << "=" << (sizeof(double)*w));
++
++        // reproduce the reduction function name
++        std::string kernelName;
++        if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
++            kernelName = Base::GetName() + "_reduction";
++        else
++            kernelName = Base::GetName() + "_sum_reduction";
++        cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
++        if (err != CL_SUCCESS)
++            throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
++        SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
++
++        // set kernel arg of reduction kernel
++        // TODO(Wei Wei): use unique name for kernel
++        cl_mem buf = Base::GetCLBuffer();
++        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
++        err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
++            static_cast<void*>(&buf));
++        if (CL_SUCCESS != err)
++            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
++        err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
++        if (CL_SUCCESS != err)
++            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
++        err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
++        if (CL_SUCCESS != err)
++            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++        SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
++        err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
++        if (CL_SUCCESS != err)
++            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++        // set work group size and execute
++        size_t global_work_size[] = { 256, static_cast<size_t>(w) };
++        size_t const local_work_size[] = { 256, 1 };
++        SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
++        err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
++            global_work_size, local_work_size, 0, nullptr, nullptr);
++        if (CL_SUCCESS != err)
++            throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
++        err = clFinish(kEnv.mpkCmdQueue);
++        if (CL_SUCCESS != err)
++            throw OpenCLError("clFinish", err, __FILE__, __LINE__);
++        if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
++        {
++            /*average need more reduction kernel for count computing*/
++            std::unique_ptr<double[]> pAllBuffer(new double[2 * w]);
++            double* resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
++                mpClmem2,
++                CL_TRUE, CL_MAP_READ, 0,
++                sizeof(double) * w, 0, nullptr, nullptr,
++                &err));
++            if (err != CL_SUCCESS)
++                throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
++
++            for (int i = 0; i < w; i++)
++                pAllBuffer[i] = resbuf[i];
++            err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
++            if (err != CL_SUCCESS)
++                throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
++
++            kernelName = Base::GetName() + "_count_reduction";
++            redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
++            if (err != CL_SUCCESS)
++                throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
++            SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
++
++            // set kernel arg of reduction kernel
++            buf = Base::GetCLBuffer();
++            SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
++            err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
++                static_cast<void*>(&buf));
++            if (CL_SUCCESS != err)
++                throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++            SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
++            err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
++            if (CL_SUCCESS != err)
++                throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++            SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
++            err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
++            if (CL_SUCCESS != err)
++                throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++            SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
++            err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
++            if (CL_SUCCESS != err)
++                throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++
++            // set work group size and execute
++            size_t global_work_size1[] = { 256, static_cast<size_t>(w) };
++            size_t const local_work_size1[] = { 256, 1 };
++            SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
++            err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
++                global_work_size1, local_work_size1, 0, nullptr, nullptr);
++            if (CL_SUCCESS != err)
++                throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
++            err = clFinish(kEnv.mpkCmdQueue);
++            if (CL_SUCCESS != err)
++                throw OpenCLError("clFinish", err, __FILE__, __LINE__);
++            resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
++                mpClmem2,
++                CL_TRUE, CL_MAP_READ, 0,
++                sizeof(double) * w, 0, nullptr, nullptr,
++                &err));
++            if (err != CL_SUCCESS)
++                throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
++            for (int i = 0; i < w; i++)
++                pAllBuffer[i + w] = resbuf[i];
++            err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
++            // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
++            if (CL_SUCCESS != err)
++                SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
++            if (mpClmem2)
++            {
++                err = clReleaseMemObject(mpClmem2);
++                SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
++                mpClmem2 = nullptr;
++            }
++            mpClmem2 = clCreateBuffer(kEnv.mpkContext,
++                cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_COPY_HOST_PTR,
++                w * sizeof(double) * 2, pAllBuffer.get(), &err);
++            if (CL_SUCCESS != err)
++                throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
++            SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << w << "*" << sizeof(double) << "=" << (w*sizeof(double)) << " copying host buffer " << pAllBuffer.get());
++        }
++        // set kernel arg
++        SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
++        err = clSetKernelArg(k, argno, sizeof(cl_mem), &mpClmem2);
++        if (CL_SUCCESS != err)
++            throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
++        return 1;
++    }
++
+ }} // namespace sc::opencl
+ 
+ /* vim:set shiftwidth=4 softtabstop=4 expandtab: */



Home | Main Index | Thread Index | Old Index