Skip to content

Commit 2b1f85c

Browse files
committed
Merge branch 'main' into maint-24.09
2 parents a0ce526 + 9ccf8ca commit 2b1f85c

File tree

9 files changed

+109
-57
lines changed

9 files changed

+109
-57
lines changed

CHANGELOG.md

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,27 @@
11
# Changelog
22

3-
## Changelog for 24.12
3+
## Changelog for 24.09
44

55
* ice: update driver to 1.14.9
6-
* st2110/20: add force numa option support on session level, see ST20_TX_FLAG_FORCE_NUMA/ST20_RX_FLAG_FORCE_NUMA
7-
* st2110/30: add force numa option support on session level, see ST30_TX_FLAG_FORCE_NUMA/ST30_RX_FLAG_FORCE_NUMA
6+
* st2110/20: add force NUMA option support on session level, see ST20_TX_FLAG_FORCE_NUMA/ST20_RX_FLAG_FORCE_NUMA
7+
* st2110/30: add force NUMA option support on session level, see ST30_TX_FLAG_FORCE_NUMA/ST30_RX_FLAG_FORCE_NUMA
8+
* ffmpeg: fix RX side dropping frames at the beginning of the session with st20/st22/st30.
9+
* st22: fix last frame dropping in TX. Ensure that last frame status changed to FREE.
10+
* dpdk: optimizing memory pool size.
11+
* manager: fix docker build.
12+
* ffmpeg: improve unicast initialization, reduce amount of dropping frames in the beginning of the session.
13+
* ixgbe: add driver support. Tested on 10-Gigabit X540-AT2 (1528) and Intel 10G X550T (1563).
14+
* sch/tasklet: fix API correct NUMA assigned when `mtl_sch_create` is used.
15+
* sch/tasklet: fix segfault when lcore out of `RTE_MAX_LCORE` assigned.
16+
* app: add new video formats to sample app - YUV_420_16bit, YUV_422_8BIT, YUV_444_8bit, YUV_444_16bit.
17+
* RTP: fix checking for valid payload type.
18+
* st30: add `fifo_size` parameter parsing from user.
19+
* st41: add `St2110-41` format for 'Fast Metadata Framework' standard.
20+
* ffmpeg: add support of `44100` rate for `st30` format.
21+
* ffmpeg: add support for v7.0 version
22+
* st22: fix correct NUMA assigned `socket_id` with pipeline when creating a new session.
23+
* GPU: add support for GPU direct buffers in ST2110/20. See `app/sample/gpu_direct` for usage.
24+
* ffmpeg: add support for GPU buffers.
825

926
## Changelog for 24.06
1027

app/src/rx_fastmetadata_app.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,7 @@ static int app_rx_fmd_compare_with_ref(struct st_app_rx_fmd_session* session, vo
4949
int frame_size) {
5050
int ret = -1;
5151
uint32_t last_zeros = 0; /* 4 bytes with 0 */
52-
uint32_t st41_ref_remaining_length =
53-
session ? session->st41_ref_end - session->st41_ref_cursor : 0;
52+
uint32_t st41_ref_remaining_length = session->st41_ref_end - session->st41_ref_cursor;
5453

5554
if (frame_size <= st41_ref_remaining_length) {
5655
ret = memcmp(frame, session->st41_ref_cursor, frame_size);
@@ -264,8 +263,7 @@ static int app_rx_fmd_init(struct st_app_context* ctx,
264263
s->st41_ref_fd = -1;
265264
if (fmd) {
266265
if (strcmp(fmd->info.fmd_url, "")) {
267-
snprintf(s->st41_ref_url, sizeof(s->st41_ref_url), "%s",
268-
fmd ? fmd->info.fmd_url : "null");
266+
snprintf(s->st41_ref_url, sizeof(s->st41_ref_url), "%s", fmd->info.fmd_url);
269267

270268
ret = app_rx_fmd_open_ref(s);
271269
if (ret < 0) {

app/src/tx_fastmetadata_app.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -480,8 +480,8 @@ static int app_tx_fmd_init(struct st_app_context* ctx,
480480
}
481481

482482
/* copying frame fields for RTP mode to function*/
483-
s->st41_dit = fmd->info.fmd_dit;
484-
s->st41_k_bit = fmd->info.fmd_k_bit;
483+
s->st41_dit = ops.fmd_dit;
484+
s->st41_k_bit = ops.fmd_k_bit;
485485

486486
s->handle = handle;
487487
snprintf(s->st41_source_url, sizeof(s->st41_source_url), "%s",

doc/configuration_guide.md

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,18 @@ Example `tx_1v_1a_1anc.json` file, find more example config file in [example con
5858
"ancillary_url": "./test.txt",
5959
"ancillary_fps": "p59"
6060
}
61+
],
62+
"fastmetadata": [
63+
{
64+
"replicas": 1,
65+
"start_port": 40000,
66+
"payload_type": 115,
67+
"type": "frame",
68+
"fastmetadata_data_item_type": 123456,
69+
"fastmetadata_k_bit": 1,
70+
"fastmetadata_url": "./test.txt",
71+
"fastmetadata_fps": "p59"
72+
}
6173
]
6274
}
6375
]
@@ -155,7 +167,27 @@ Items in each element of the "ancillary" array
155167

156168
**ancillary_url (string):** ancillary source
157169

158-
**ancillary_fps (string):** `"p59", "p50", "p29"`ancillary fps which should be aligned to video
170+
**ancillary_fps (string):** `"p59", "p50", "p29"` ancillary fps which should be aligned to video
171+
172+
#### fast metadata (array of fast metadata sessions)
173+
174+
Items in each element of the "fastmetadata" array
175+
176+
**replicas (int):** `1~max_num` the number of session copies
177+
178+
**type (string):** `"frame", "rtp"` app->lib data type
179+
180+
**start_port (int):** `0~65535` start udp port for copies of sessions
181+
182+
**payload_type (int):** `0~127` 7 bits payload type define in RFC3550
183+
184+
**fastmetadata_data_item_type (int):** `0~4194303` (0x - 0x3fffff) 22 bits data item type
185+
186+
**fastmetadata_k_bit (int):** `0~1` 1 bit K-bit value
187+
188+
**fastmetadata_url (string):** fast metadata source
189+
190+
**fastmetadata_fps (string):** `"p59", "p50", "p29"` fast metadata fps which should be aligned to video
159191

160192
### RX Sessions (array of rx session groups)
161193

@@ -219,6 +251,22 @@ Items in each element of the "ancillary" array
219251

220252
**payload_type (int):** `0~127` 7 bits payload type define in RFC3550
221253

254+
#### fast metadata (array of fast metadata sessions) for RX
255+
256+
Items in each element of the "fastmetadata" array
257+
258+
**replicas (int):** `1~max_num` the number of session copies
259+
260+
**start_port (int):** `0~65535` start udp port for copies of sessions
261+
262+
**payload_type (int):** `0~127` 7 bits payload type define in RFC3550
263+
264+
**fastmetadata_data_item_type (int):** `0~4194303` (0x - 0x3fffff) 22 bits data item type - reference value (for testing the flow) - Optional setting
265+
266+
**fastmetadata_k_bit (int):** `0~1` 1 bit K-bit value - reference value (for testing the flow) - Optional setting
267+
268+
**fastmetadata_url (string):** fast metadata reference file (for testing the flow) - Optional setting
269+
222270
### Others
223271

224272
**shared_tx_queues (bool):** If enable the shared tx queues or not, (optional). The queue number is limited for NIC, to support sessions more than queue number, enable this option to share queue resource between sessions.

doc/gpu.md

Lines changed: 13 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,72 +1,55 @@
11
# GPU
22

3-
This is an experimental feature
4-
53
## General Info
64

7-
The idea to use Lever Zero API to allocation buffers directly in GPU to reduce amount of copy from kernel to user space.
8-
GPU <-> NIC.
9-
10-
This library provides a wrapper for Level Zero to init GPU and provide functions to allocate shared or device memory.
11-
12-
## Build
13-
14-
Use Cmake to build the project
15-
16-
## How to use it
5+
It's possible to create a memory buffer in GPU for the frames in st20 protocol.
6+
This is done by using [gpu direct](../gpu_direct/README.md) library.
177

18-
1) Use 'get_devices' to list drivers and devices index.
19-
2) Use 'init_gpu_device' to init gpu context
20-
3) Allocate memory with 'gpu_allocate_device_buffer' or 'gpu_allocate_shared_buffer'
21-
4) Use 'gpu_memcpy' and 'gpu_memset' for memcpy and memset operations
22-
5) Free space with gpu_free_buf.
23-
6) Free gpu context with free_gpu_context.
8+
Refer to [gpu direct s20 pipeline](../app/sample/gpu_direct) to see an example.
249

2510
## Build MTL GPU-Direct Library
11+
2612
Use Meson to build the GPU-Direct library specifically.
2713

28-
``` bash
14+
```bash
2915
cd <mtl>/gpu_direct
3016
meson setup build
3117
sudo meson install -C build
18+
3219
# check package installed
3320
pkg-config --libs mtl_gpu_direct
3421

3522
# build the mtl library
3623
./build.sh
3724
```
3825

39-
``` bash
4026
Run TX Sample App
41-
Prepare a file (test.yuv) of 1920x1080 UYVY frames to send. You can refer to run.md for more details.
27+
Prepare a file (test.yuv) of 1920x1080 UYVY frames to send. You can refer to [run guide](../doc/run.md) for more details.
4228

29+
```bash
4330
./build/app/GpuDirectVideoTxMultiSample 192.168.99.110 20000 test.yuv
31+
```
32+
4433
Run RX Sample App
4534
You need the SDL library to display the received frame.
46-
```
4735

4836
``` bash
4937
./build/app/GpuDirectVideoRxMultiSample 192.168.99.111 192.168.99.110 20000
5038
```
5139

52-
5340
## How to enable it in MTL
5441

5542
Currently, only the ST20P receive frame mode supports VRAM frame allocation.
43+
5644
To enable this feature, use the following flag while initializing the session:
5745
`ST20P_RX_FLAG_USE_GPU_DIRECT_FRAMEBUFFERS`
5846

5947
This setting instructs MTL to allocate frames directly in VRAM.
6048

61-
Additionally, you must initialize the GPU device in your application using this library
49+
Additionally, you must initialize the GPU device in your application using gpu direct library by
6250
`init_gpu_device` function.
6351

64-
6552
Pass the address of the device with the gpu_context parameter:
66-
`gpu_context` to the st20p rx flags during session initalization.
53+
`gpu_context` to the st20p rx flags during session initialization.
6754

6855
**Warning:** Direct memory access functionality is disabled when using this flag. Memory allocated in VRAM cannot be accessed directly using dpdk API.
69-
70-
### Links
71-
72-
- [Level Zero Intro](https://www.intel.com/content/www/us/en/developer/articles/technical/using-oneapi-level-zero-interface.html)

ecosystem/ffmpeg_plugin/README.md

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -152,38 +152,41 @@ ffmpeg -stream_loop -1 -video_size 1920x1080 -f rawvideo -pix_fmt yuv422p10le -i
152152
Reading a st2110-30 stream(pcm24,1ms packet time,2 channels) on "239.168.85.20:30000" with payload_type 111 and encoded to a wav file:
153153

154154
```bash
155-
ffmpeg -p_port 0000:af:01.0 -p_sip 192.168.96.2 -p_rx_ip 239.168.85.20 -udp_port 30000 -payload_type 111 -pcm_fmt pcm24 -at 1ms -ac 2 -f mtl_st30p -i "0" dump.wav -y
155+
ffmpeg -p_port 0000:af:01.0 -p_sip 192.168.96.2 -p_rx_ip 239.168.85.20 -udp_port 30000 -payload_type 111 -pcm_fmt pcm24 -ptime 1ms -channels 2 -f mtl_st30p -i "0" dump.wav -y
156156
```
157157

158158
### 4.2 St30p output
159159

160160
Reading from a wav file and sending a st2110-30 stream(pcm24,1ms packet time,2 channels) on "239.168.85.20:30000" with payload_type 111:
161161

162162
```bash
163-
ffmpeg -stream_loop -1 -i test.wav -p_port 0000:af:01.1 -p_sip 192.168.96.3 -p_tx_ip 239.168.85.20 -udp_port 30000 -payload_type 111 -at 1ms -f mtl_st30p -
163+
ffmpeg -stream_loop -1 -i test.wav -p_port 0000:af:01.1 -p_sip 192.168.96.3 -p_tx_ip 239.168.85.20 -udp_port 30000 -payload_type 111 -ptime 1ms -f mtl_st30p -
164164
```
165165

166166
### 4.3 St30p pcm16 example
167167

168168
For pcm16 audio, use `mtl_st30p_pcm16` muxer, set `pcm_fmt` to `pcm16` for demuxer.
169169

170170
```bash
171-
ffmpeg -stream_loop -1 -i test.wav -p_port 0000:af:01.1 -p_sip 192.168.96.3 -p_tx_ip 239.168.85.20 -udp_port 30000 -payload_type 111 -at 1ms -f mtl_st30p_pcm16 -
171+
ffmpeg -stream_loop -1 -i test.wav -p_port 0000:af:01.1 -p_sip 192.168.96.3 -p_tx_ip 239.168.85.20 -udp_port 30000 -payload_type 111 -ptime 1ms -f mtl_st30p_pcm16 -
172172

173-
ffmpeg -p_port 0000:af:01.0 -p_sip 192.168.96.2 -p_rx_ip 239.168.85.20 -udp_port 30000 -payload_type 111 -pcm_fmt pcm16 -at 1ms -ac 2 -f mtl_st30p -i "0" dump_pcm16.wav -y
173+
ffmpeg -p_port 0000:af:01.0 -p_sip 192.168.96.2 -p_rx_ip 239.168.85.20 -udp_port 30000 -payload_type 111 -pcm_fmt pcm16 -ptime 1ms -channels 2 -f mtl_st30p -i "0" dump_pcm16.wav -y
174174
```
175175

176-
### Enabling experimental MTL_GPU_DIRECT in FFmpeg with ST20p Support
176+
## 5. St20 GPU direct guide
177177

178178
The MTL_GPU_DIRECT experimental feature aims at enhancing FFmpeg's performance by allowing direct access to GPU memory, which can be particularly beneficial when working with high-throughput video streams such as those handled by the MTL ST20 codec plugin.
179179

180-
#### Building FFmpeg with MTL_GPU_DIRECT Enabled
180+
### 5.1 Enabling experimental MTL_GPU_DIRECT in FFmpeg with ST20p Support
181+
181182
To take advantage of the MTL_GPU_DIRECT feature FFmpeg has to be built with this option enabled. Here’s how to do it:
182183

183184
```bash
184185
./configure --enable-shared --disable-static --enable-nonfree --enable-pic --enable-gpl --enable-libopenh264 --enable-encoder=libopenh264 --enable-mtl --extra-cflags="-DMTL_GPU_DIRECT_ENABLED"
185186
```
187+
186188
or use
189+
187190
```bash
188191
./build_ffmpeg_plugin.sh -g
189192
```
@@ -195,17 +198,20 @@ enabled gpu_direct:
195198
./ffmpeg -p_port 0000:af:01.0 -p_sip 192.168.96.2 -p_rx_ip 239.168.85.20 -udp_port 20000 -payload_type 112 -fps 59.94 -pix_fmt yuv422p10le -video_size 1920x1080 -gpu_direct 1 -gpu_driver 0 -gpu_device 0 -f mtl_st20p -i "k" -f rawvideo /dev/null -y
196199
```
197200

198-
#### Additional Notes
201+
### 5.2 Additional Notes
202+
199203
**GPU Direct Flag:** When compiling FFmpeg with the MTL_GPU_DIRECT feature enabled, ensure that your system's GPU drivers and hardware support direct GPU memory access.
200204
GPU device IDs and GPU driver IDs are printed during initialization.
201205

202206
**Options:**
207+
203208
1. `-gpu_device`
204209
1. `-gpu_driver`
205210

206211
Both default to 0, but if your device doesn't initialize, adjust it using the information printed during initialization.
207212

208213
**Example:**
214+
209215
```plaintext
210216
Drivers count: 1
211217
Driver: 0: Device: 0: Name: Intel(R) Data Center GPU Flex 170, Type: 1, VendorID: 8086, DeviceID: 22208

ecosystem/ffmpeg_plugin/mtl_st30p_rx.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ static int mtl_st30p_read_header(AVFormatContext* ctx) {
119119
ops_rx.ptime = s->ptime;
120120
ops_rx.channel = s->channels;
121121
ret = mtl_parse_st30_sample_rate(&ops_rx.sampling, s->sample_rate);
122-
if (!ret) {
122+
if (ret) {
123123
err(ctx, "%s, invalid sample_rate: %d\n", __func__, s->sample_rate);
124124
return ret;
125125
}
@@ -266,16 +266,16 @@ static const AVOption mtl_st30p_rx_options[] = {
266266
0,
267267
60,
268268
DEC},
269-
{"ar",
270-
"audio sampling rate",
269+
{"sample_rate",
270+
"audio sample rate",
271271
OFFSET(sample_rate),
272272
AV_OPT_TYPE_INT,
273273
{.i64 = 48000},
274274
1,
275275
INT_MAX,
276276
DEC},
277-
{"ac",
278-
"audio channel",
277+
{"channels",
278+
"number of audio channels",
279279
OFFSET(channels),
280280
AV_OPT_TYPE_INT,
281281
{.i64 = 2},
@@ -288,7 +288,7 @@ static const AVOption mtl_st30p_rx_options[] = {
288288
AV_OPT_TYPE_STRING,
289289
{.str = NULL},
290290
.flags = DEC},
291-
{"at",
291+
{"ptime",
292292
"audio packet time",
293293
OFFSET(ptime_str),
294294
AV_OPT_TYPE_STRING,

ecosystem/ffmpeg_plugin/mtl_st30p_tx.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ static int mtl_st30p_write_header(AVFormatContext* ctx) {
111111
ops_tx.channel = codecpar->ch_layout.nb_channels;
112112
#endif
113113
ret = mtl_parse_st30_sample_rate(&ops_tx.sampling, codecpar->sample_rate);
114-
if (!ret) {
114+
if (ret) {
115115
err(ctx, "%s, unknown sample_rate %d\n", __func__, codecpar->sample_rate);
116116
return ret;
117117
}
@@ -230,7 +230,7 @@ static const AVOption mtl_st30p_tx_options[] = {
230230
3,
231231
8000,
232232
ENC},
233-
{"at",
233+
{"ptime",
234234
"audio packet time",
235235
OFFSET(ptime_str),
236236
AV_OPT_TYPE_STRING,

lib/src/st2110/st_rx_video_session.c

100644100755
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -457,11 +457,11 @@ static int rv_alloc_frames(struct mtl_main_impl* impl,
457457
} else {
458458
#ifdef MTL_GPU_DIRECT_ENABLED
459459
if (rv_framebuffer_in_gpu_direct_vram(s)) {
460-
info("%s: using gpu direct feature.\n", __func__);
460+
info("%s: using GPU direct feature.\n", __func__);
461461
GpuContext* gpu = s->ops.gpu_context;
462462
ret = gpu_allocate_shared_buffer(gpu, &frame, size);
463463
if (ret < 0) {
464-
err("%s: failed to allocate gpu memory on vram. ret: %d\n", __func__, ret);
464+
err("%s: failed to allocate GPU memory on vram. ret: %d\n", __func__, ret);
465465
return -ENOMEM;
466466
}
467467
} else

0 commit comments

Comments
 (0)