Skip to content

Commit 810e795

Browse files
committed
Optimized SRTP stream lookup with SSE2.
Stream lookup by SSRC is now performed using SSE2 intrinsics, which is considerably faster when there are many streams in the list. Although the lookup still has linear complexity, its absolute times are reduced and with tens to hundreds elements are lower or comparable with a typical rb-tree equivalent. Expected stream lookup performance of scalar array-based implementation and its SSE2 version compared to the list-based implementation that was used previously: SSRCs speedup (scalar) speedup (SSE2) 1 0.39x 0.22x 3 0.57x 0.23x 5 0.69x 0.62x 10 0.77x 1.43x 20 0.86x 2.38x 30 0.87x 3.44x 50 1.13x 6.21x 100 1.25x 8.51x 200 1.30x 9.83x Performance tested on an Intel Core i7 2600K CPU.
1 parent 372491b commit 810e795

File tree

3 files changed

+176
-16
lines changed

3 files changed

+176
-16
lines changed

srtp/stream_list.c

+6-16
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,12 @@
5252
#include "err.h"
5353
#include "alloc.h"
5454

55+
#if defined(__SSE2__)
56+
#include "stream_list_sse2.h"
57+
#else
58+
#include "stream_list_generic.h"
59+
#endif
60+
5561
/*
5662
* Initializes an empty list of streams
5763
*/
@@ -63,22 +69,6 @@ void srtp_stream_list_init(srtp_stream_list_t *streams)
6369
streams->capacity = 0u;
6470
}
6571

66-
/*
67-
* Returns an index of the stream corresponding to ssrc,
68-
* or >= streams->size if no stream exists for that ssrc.
69-
*/
70-
uint32_t srtp_stream_list_find(const srtp_stream_list_t *streams, uint32_t ssrc)
71-
{
72-
/* walk down list until ssrc is found */
73-
uint32_t pos = 0u, n = streams->size;
74-
for (; pos < n; ++pos) {
75-
if (streams->ssrcs[pos] == ssrc)
76-
break;
77-
}
78-
79-
return pos;
80-
}
81-
8272
/*
8373
* Reserves storage to be able to store at least the specified number
8474
* of elements.

srtp/stream_list_generic.h

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/*
2+
* stream_list_generic.h
3+
*
4+
* SRTP stream list generic implementation
5+
*
6+
* Andrey Semashev
7+
*/
8+
/*
9+
*
10+
* Copyright (c) 2022, Cisco Systems, Inc.
11+
* All rights reserved.
12+
*
13+
* Redistribution and use in source and binary forms, with or without
14+
* modification, are permitted provided that the following conditions
15+
* are met:
16+
*
17+
* Redistributions of source code must retain the above copyright
18+
* notice, this list of conditions and the following disclaimer.
19+
*
20+
* Redistributions in binary form must reproduce the above
21+
* copyright notice, this list of conditions and the following
22+
* disclaimer in the documentation and/or other materials provided
23+
* with the distribution.
24+
*
25+
* Neither the name of the Cisco Systems, Inc. nor the names of its
26+
* contributors may be used to endorse or promote products derived
27+
* from this software without specific prior written permission.
28+
*
29+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30+
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31+
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
32+
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
33+
* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
34+
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
35+
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
36+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37+
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
38+
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
40+
* OF THE POSSIBILITY OF SUCH DAMAGE.
41+
*
42+
*/
43+
44+
/* NOTE: This file is intended to be included only once, in stream_list.c */
45+
46+
/*
47+
* Returns an index of the stream corresponding to ssrc,
48+
* or >= streams->size if no stream exists for that ssrc.
49+
*/
50+
uint32_t srtp_stream_list_find(const srtp_stream_list_t *streams, uint32_t ssrc)
51+
{
52+
/* walk down list until ssrc is found */
53+
uint32_t pos = 0u, n = streams->size;
54+
for (; pos < n; ++pos) {
55+
if (streams->ssrcs[pos] == ssrc)
56+
break;
57+
}
58+
59+
return pos;
60+
}

srtp/stream_list_sse2.h

+110
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
/*
2+
* stream_list.c
3+
*
4+
* SRTP stream list SSE2 implementation
5+
*
6+
* Andrey Semashev
7+
*/
8+
/*
9+
*
10+
* Copyright (c) 2022, Cisco Systems, Inc.
11+
* All rights reserved.
12+
*
13+
* Redistribution and use in source and binary forms, with or without
14+
* modification, are permitted provided that the following conditions
15+
* are met:
16+
*
17+
* Redistributions of source code must retain the above copyright
18+
* notice, this list of conditions and the following disclaimer.
19+
*
20+
* Redistributions in binary form must reproduce the above
21+
* copyright notice, this list of conditions and the following
22+
* disclaimer in the documentation and/or other materials provided
23+
* with the distribution.
24+
*
25+
* Neither the name of the Cisco Systems, Inc. nor the names of its
26+
* contributors may be used to endorse or promote products derived
27+
* from this software without specific prior written permission.
28+
*
29+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30+
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31+
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
32+
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
33+
* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
34+
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
35+
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
36+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37+
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
38+
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
40+
* OF THE POSSIBILITY OF SUCH DAMAGE.
41+
*
42+
*/
43+
44+
/* NOTE: This file is intended to be included only once, in stream_list.c */
45+
46+
#include <emmintrin.h>
47+
#if defined(_MSC_VER)
48+
#include <intrin.h>
49+
#endif
50+
51+
/*
52+
* Returns an index of the stream corresponding to ssrc,
53+
* or >= streams->size if no stream exists for that ssrc.
54+
*/
55+
uint32_t srtp_stream_list_find(const srtp_stream_list_t *streams, uint32_t ssrc)
56+
{
57+
const uint32_t *const ssrcs = streams->ssrcs;
58+
const __m128i mm_ssrc = _mm_set1_epi32(ssrc);
59+
uint32_t pos = 0u, n = (streams->size + 7u) & ~(uint32_t)(7u);
60+
for (uint32_t m = n & ~(uint32_t)(15u); pos < m; pos += 16u) {
61+
__m128i mm1 = _mm_loadu_si128((const __m128i *)(ssrcs + pos));
62+
__m128i mm2 = _mm_loadu_si128((const __m128i *)(ssrcs + pos + 4u));
63+
__m128i mm3 = _mm_loadu_si128((const __m128i *)(ssrcs + pos + 8u));
64+
__m128i mm4 = _mm_loadu_si128((const __m128i *)(ssrcs + pos + 12u));
65+
mm1 = _mm_cmpeq_epi32(mm1, mm_ssrc);
66+
mm2 = _mm_cmpeq_epi32(mm2, mm_ssrc);
67+
mm3 = _mm_cmpeq_epi32(mm3, mm_ssrc);
68+
mm4 = _mm_cmpeq_epi32(mm4, mm_ssrc);
69+
mm1 = _mm_packs_epi32(mm1, mm2);
70+
mm3 = _mm_packs_epi32(mm3, mm4);
71+
mm1 = _mm_packs_epi16(mm1, mm3);
72+
uint32_t mask = _mm_movemask_epi8(mm1);
73+
if (mask) {
74+
#if defined(_MSC_VER)
75+
unsigned long bit_pos;
76+
_BitScanForward(&bit_pos, mask);
77+
pos += bit_pos;
78+
#else
79+
pos += __builtin_ctz(mask);
80+
#endif
81+
82+
goto done;
83+
}
84+
}
85+
86+
if (pos < n) {
87+
__m128i mm1 = _mm_loadu_si128((const __m128i *)(ssrcs + pos));
88+
__m128i mm2 = _mm_loadu_si128((const __m128i *)(ssrcs + pos + 4u));
89+
mm1 = _mm_cmpeq_epi32(mm1, mm_ssrc);
90+
mm2 = _mm_cmpeq_epi32(mm2, mm_ssrc);
91+
mm1 = _mm_packs_epi32(mm1, mm2);
92+
93+
uint32_t mask = _mm_movemask_epi8(mm1);
94+
if (mask) {
95+
#if defined(_MSC_VER)
96+
unsigned long bit_pos;
97+
_BitScanForward(&bit_pos, mask);
98+
pos += bit_pos / 2u;
99+
#else
100+
pos += __builtin_ctz(mask) / 2u;
101+
#endif
102+
goto done;
103+
}
104+
105+
pos += 8u;
106+
}
107+
108+
done:
109+
return pos;
110+
}

0 commit comments

Comments
 (0)