Skip to content

Commit

Permalink
*refactoring of SSE4.1 optimizations of SynetMergedConvolution32f.
Browse files Browse the repository at this point in the history
  • Loading branch information
ermig1979 committed Oct 14, 2024
1 parent ee2ffcb commit 690e34e
Show file tree
Hide file tree
Showing 14 changed files with 1,737 additions and 2,483 deletions.
7 changes: 4 additions & 3 deletions prj/vs2019/Sse41.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,10 @@
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution16bDepthwise.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution16bInput.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution16bOutput.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fCd.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fCdc.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fDc.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32f.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fDepthwise.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fInput.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fOutput.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution8i.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution8iDepthwise.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution8iInput.cpp" />
Expand Down
21 changes: 12 additions & 9 deletions prj/vs2019/Sse41.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -265,15 +265,6 @@
<ClCompile Include="..\..\src\Simd\SimdSse41SynetConvolution32fNhwcDirect3r.cpp">
<Filter>Sse41</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fCd.cpp">
<Filter>Sse41</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fCdc.cpp">
<Filter>Sse41</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fDc.cpp">
<Filter>Sse41</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdSse41SynetInnerProduct8i.cpp">
<Filter>Sse41</Filter>
</ClCompile>
Expand Down Expand Up @@ -418,6 +409,18 @@
<ClCompile Include="..\..\src\Simd\SimdSse41SynetConvolution16bNhwcDeptwise.cpp">
<Filter>Sse41</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32f.cpp">
<Filter>Sse41</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fDepthwise.cpp">
<Filter>Sse41</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fInput.cpp">
<Filter>Sse41</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fOutput.cpp">
<Filter>Sse41</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<Filter Include="Sse41">
Expand Down
7 changes: 4 additions & 3 deletions prj/vs2022/Sse41.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,10 @@
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution16bDepthwise.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution16bInput.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution16bOutput.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fCd.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fCdc.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fDc.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32f.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fDepthwise.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fInput.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fOutput.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution8i.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution8iDepthwise.cpp" />
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution8iInput.cpp" />
Expand Down
21 changes: 12 additions & 9 deletions prj/vs2022/Sse41.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -265,15 +265,6 @@
<ClCompile Include="..\..\src\Simd\SimdSse41SynetConvolution32fNhwcDirect3r.cpp">
<Filter>Sse41</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fCd.cpp">
<Filter>Sse41</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fCdc.cpp">
<Filter>Sse41</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fDc.cpp">
<Filter>Sse41</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdSse41SynetInnerProduct8i.cpp">
<Filter>Sse41</Filter>
</ClCompile>
Expand Down Expand Up @@ -418,6 +409,18 @@
<ClCompile Include="..\..\src\Simd\SimdSse41SynetConvolution16bNhwcDeptwise.cpp">
<Filter>Sse41</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32f.cpp">
<Filter>Sse41</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fDepthwise.cpp">
<Filter>Sse41</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fInput.cpp">
<Filter>Sse41</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdSse41SynetMergedConvolution32fOutput.cpp">
<Filter>Sse41</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<Filter Include="Sse41">
Expand Down
2 changes: 2 additions & 0 deletions src/Simd/SimdBaseSynetMergedConvolution32f.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,7 @@ namespace Simd
break;
}
_sizeB[1] = 0;
_bufH[1] = 0;
for (size_t i = 0; i < 2; ++i)
{
size_t dstC = AlignHiAny(p.conv[i].dstC, i == 1 ? _miC : 2 * _miC);
Expand Down Expand Up @@ -551,6 +552,7 @@ namespace Simd
break;
}
_bufH[1] = _bufH[0];
_bufH[0] = 0;
_sizeB[1] = 0;
for (size_t i = 0; i < 2; ++i)
{
Expand Down
81 changes: 81 additions & 0 deletions src/Simd/SimdSse41SynetMergedConvolution32f.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Simd Library (http://ermig1979.github.io/Simd).
*
* Copyright (c) 2011-2024 Yermalayeu Ihar.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "Simd/SimdSynetMergedConvolution32f.h"
#include "Simd/SimdSynetConvolution32fCommon.h"
#include "Simd/SimdUpdate.h"
#include "Simd/SimdCpu.h"

namespace Simd
{
#if defined(SIMD_SSE41_ENABLE) && defined(SIMD_SYNET_ENABLE)
namespace Sse41
{
SynetMergedConvolution32fCdc::SynetMergedConvolution32fCdc(const MergConvParam& p)
: Base::SynetMergedConvolution32fCdc(p)
{
SetSize(Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(), F);
SetInput(p.conv[0], _convolution + 0);
SetDepthwise(p.conv[1], false, _convolution + 1);
SetOutput(p.conv[2], _convolution + 2);
}

//-------------------------------------------------------------------------------------------------

SynetMergedConvolution32fCd::SynetMergedConvolution32fCd(const MergConvParam& p)
: Base::SynetMergedConvolution32fCd(p)
{
SetSize(Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(), F);
SetInput(_param.conv[0], _convolution + 0);
SetDepthwise(_param.conv[1], true, _convolution + 1);
}

//-------------------------------------------------------------------------------------------------

SynetMergedConvolution32fDc::SynetMergedConvolution32fDc(const MergConvParam& p)
: Base::SynetMergedConvolution32fDc(p)
{
SetSize(Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(), F);
SetDepthwise(p.conv[0], false, _convolution + 0);
SetOutput(p.conv[1], _convolution + 1);
}

//-------------------------------------------------------------------------------------------------

void* SynetMergedConvolution32fInit(size_t batch, const SimdConvolutionParameters* convs, size_t count, SimdBool add)
{
MergConvParam param(batch, convs, count, add, SimdSynetCompatibilityDefault);
if (!param.Valid(SimdTensorData32f))
return NULL;
if (SynetMergedConvolution32fCdc::Preferable(param))
return new Sse41::SynetMergedConvolution32fCdc(param);
else if (SynetMergedConvolution32fCd::Preferable(param))
return new Sse41::SynetMergedConvolution32fCd(param);
else if (SynetMergedConvolution32fDc::Preferable(param))
return new Sse41::SynetMergedConvolution32fDc(param);
else
return new Base::SynetMergedConvolution32f(param);
}
}
#endif
}
Loading

0 comments on commit 690e34e

Please sign in to comment.