Merge branch 'main' into maint-24.09

PanKaker · PanKaker · commit 2b1f85c3ea64 · 2024-10-03T13:04:31.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,10 +1,27 @@
 # Changelog
 
-## Changelog for 24.12
+## Changelog for 24.09
 
 * ice: update driver to 1.14.9
-* st2110/20: add force numa option support on session level, see ST20_TX_FLAG_FORCE_NUMA/ST20_RX_FLAG_FORCE_NUMA
-* st2110/30: add force numa option support on session level, see ST30_TX_FLAG_FORCE_NUMA/ST30_RX_FLAG_FORCE_NUMA
+* st2110/20: add force NUMA option support on session level, see ST20_TX_FLAG_FORCE_NUMA/ST20_RX_FLAG_FORCE_NUMA
+* st2110/30: add force NUMA option support on session level, see ST30_TX_FLAG_FORCE_NUMA/ST30_RX_FLAG_FORCE_NUMA
+* ffmpeg: fix RX side dropping frames at the beginning of the session with st20/st22/st30.
+* st22: fix last frame dropping in TX. Ensure that last frame status changed to FREE.
+* dpdk: optimizing memory pool size.
+* manager: fix docker build.
+* ffmpeg: improve unicast initialization, reduce amount of dropping frames in the beginning of the session.
+* ixgbe: add driver support. Tested on 10-Gigabit X540-AT2 (1528) and Intel 10G X550T (1563).
+* sch/tasklet: fix API correct NUMA assigned when `mtl_sch_create` is used.
+* sch/tasklet: fix segfault when lcore out of `RTE_MAX_LCORE` assigned.
+* app: add new video formats to sample app - YUV_420_16bit, YUV_422_8BIT, YUV_444_8bit, YUV_444_16bit.
+* RTP: fix checking for valid payload type.
+* st30: add `fifo_size` parameter parsing from user.
+* st41: add `St2110-41` format for 'Fast Metadata Framework' standard.
+* ffmpeg: add support of `44100` rate for `st30` format.
+* ffmpeg: add support for v7.0 version
+* st22: fix correct NUMA assigned `socket_id` with pipeline when creating a new session.
+* GPU: add support for GPU direct buffers in ST2110/20. See `app/sample/gpu_direct` for usage.
+* ffmpeg: add support for GPU buffers.
 
 ## Changelog for 24.06
 
diff --git a/app/src/rx_fastmetadata_app.c b/app/src/rx_fastmetadata_app.c
@@ -49,8 +49,7 @@ static int app_rx_fmd_compare_with_ref(struct st_app_rx_fmd_session* session, vo
                                        int frame_size) {
   int ret = -1;
   uint32_t last_zeros = 0; /* 4 bytes with 0 */
-  uint32_t st41_ref_remaining_length =
-      session ? session->st41_ref_end - session->st41_ref_cursor : 0;
+  uint32_t st41_ref_remaining_length = session->st41_ref_end - session->st41_ref_cursor;
 
   if (frame_size <= st41_ref_remaining_length) {
     ret = memcmp(frame, session->st41_ref_cursor, frame_size);
@@ -264,8 +263,7 @@ static int app_rx_fmd_init(struct st_app_context* ctx,
   s->st41_ref_fd = -1;
   if (fmd) {
     if (strcmp(fmd->info.fmd_url, "")) {
-      snprintf(s->st41_ref_url, sizeof(s->st41_ref_url), "%s",
-               fmd ? fmd->info.fmd_url : "null");
+      snprintf(s->st41_ref_url, sizeof(s->st41_ref_url), "%s", fmd->info.fmd_url);
 
       ret = app_rx_fmd_open_ref(s);
       if (ret < 0) {
diff --git a/app/src/tx_fastmetadata_app.c b/app/src/tx_fastmetadata_app.c
@@ -480,8 +480,8 @@ static int app_tx_fmd_init(struct st_app_context* ctx,
   }
 
   /* copying frame fields for RTP mode to function*/
-  s->st41_dit = fmd->info.fmd_dit;
-  s->st41_k_bit = fmd->info.fmd_k_bit;
+  s->st41_dit = ops.fmd_dit;
+  s->st41_k_bit = ops.fmd_k_bit;
 
   s->handle = handle;
   snprintf(s->st41_source_url, sizeof(s->st41_source_url), "%s",
diff --git a/doc/configuration_guide.md b/doc/configuration_guide.md
@@ -58,6 +58,18 @@ Example `tx_1v_1a_1anc.json` file, find more example config file in [example con
                     "ancillary_url": "./test.txt",
                     "ancillary_fps": "p59"
                 }
+            ],
+            "fastmetadata": [
+                {
+                    "replicas": 1,
+                    "start_port": 40000,
+                    "payload_type": 115,
+                    "type": "frame",
+                    "fastmetadata_data_item_type": 123456,
+                    "fastmetadata_k_bit": 1,
+                    "fastmetadata_url": "./test.txt",
+                    "fastmetadata_fps": "p59"
+                }
             ]
         }
     ]
@@ -155,7 +167,27 @@ Items in each element of the "ancillary" array
 
 ​ **ancillary_url (string):** ancillary source
 
-   **ancillary_fps (string):** `"p59", "p50", "p29"`ancillary fps which should be aligned to video
+​ **ancillary_fps (string):** `"p59", "p50", "p29"` ancillary fps which should be aligned to video
+
+#### fast metadata (array of fast metadata sessions)
+
+Items in each element of the "fastmetadata" array
+
+​ **replicas (int):** `1~max_num` the number of session copies
+
+​ **type (string):** `"frame", "rtp"` app->lib data type
+
+​ **start_port (int):** `0~65535` start udp port for copies of sessions
+
+​ **payload_type (int):** `0~127` 7 bits payload type define in RFC3550
+
+​ **fastmetadata_data_item_type (int):** `0~4194303`  (0x - 0x3fffff) 22 bits data item type
+
+​ **fastmetadata_k_bit (int):** `0~1` 1 bit K-bit value
+
+​ **fastmetadata_url (string):** fast metadata source
+
+ **fastmetadata_fps (string):** `"p59", "p50", "p29"` fast metadata fps which should be aligned to video
 
 ### RX Sessions (array of rx session groups)
 
@@ -219,6 +251,22 @@ Items in each element of the "ancillary" array
 
 ​ **payload_type (int):** `0~127` 7 bits payload type define in RFC3550
 
+#### fast metadata (array of fast metadata sessions) for RX
+
+Items in each element of the "fastmetadata" array
+
+​ **replicas (int):** `1~max_num` the number of session copies
+
+​ **start_port (int):** `0~65535` start udp port for copies of sessions
+
+​ **payload_type (int):** `0~127` 7 bits payload type define in RFC3550
+
+​ **fastmetadata_data_item_type (int):** `0~4194303`  (0x - 0x3fffff) 22 bits data item type - reference value (for testing the flow) - Optional setting
+
+​ **fastmetadata_k_bit (int):** `0~1` 1 bit K-bit value - reference value (for testing the flow) - Optional setting
+
+​ **fastmetadata_url (string):** fast metadata reference file (for testing the flow) - Optional setting
+
 ### Others
 
  **shared_tx_queues (bool):** If enable the shared tx queues or not, (optional). The queue number is limited for NIC, to support sessions more than queue number, enable this option to share queue resource between sessions.
diff --git a/doc/gpu.md b/doc/gpu.md
@@ -1,72 +1,55 @@
 # GPU
 
-This is an experimental feature
-
 ## General Info
 
-The idea to use Lever Zero API to allocation buffers directly in GPU to reduce amount of copy from kernel to user space.
-GPU <-> NIC.
-
-This library provides a wrapper for Level Zero to init GPU and provide functions to allocate shared or device memory.
-
-## Build
-
-Use Cmake to build the project
-
-## How to use it
+It's possible to create a memory buffer in GPU for the frames in st20 protocol.
+This is done by using [gpu direct](../gpu_direct/README.md) library.
 
-1) Use 'get_devices' to list drivers and devices index.
-2) Use 'init_gpu_device' to init gpu context
-3) Allocate memory with  'gpu_allocate_device_buffer' or 'gpu_allocate_shared_buffer'
-4) Use 'gpu_memcpy' and 'gpu_memset' for memcpy and memset operations
-5) Free space with gpu_free_buf.
-6) Free gpu context with free_gpu_context.
+Refer to [gpu direct s20 pipeline](../app/sample/gpu_direct) to see an example.
 
 ## Build MTL GPU-Direct Library
+
 Use Meson to build the GPU-Direct library specifically.
 
-``` bash
+```bash
 cd <mtl>/gpu_direct
 meson setup build
 sudo meson install -C build
+
 # check package installed
 pkg-config --libs mtl_gpu_direct
 
 # build the mtl library
 ./build.sh
 ```
 
-``` bash
 Run TX Sample App
-Prepare a file (test.yuv) of 1920x1080 UYVY frames to send. You can refer to run.md for more details.
+Prepare a file (test.yuv) of 1920x1080 UYVY frames to send. You can refer to [run guide](../doc/run.md) for more details.
 
+```bash
 ./build/app/GpuDirectVideoTxMultiSample 192.168.99.110 20000 test.yuv
+```
+
 Run RX Sample App
 You need the SDL library to display the received frame.
-```
 
 ``` bash
 ./build/app/GpuDirectVideoRxMultiSample 192.168.99.111 192.168.99.110 20000
 ```
 
-
 ## How to enable it in MTL
 
 Currently, only the ST20P receive frame mode supports VRAM frame allocation.
+
 To enable this feature, use the following flag while initializing the session:  
 `ST20P_RX_FLAG_USE_GPU_DIRECT_FRAMEBUFFERS`
 
 This setting instructs MTL to allocate frames directly in VRAM.
 
-Additionally, you must initialize the GPU device in your application using this library  
+Additionally, you must initialize the GPU device in your application using gpu direct library by
 `init_gpu_device` function.
 
-
 Pass the address of the device with the gpu_context parameter:  
-`gpu_context` to the st20p rx flags during session initalization.
+`gpu_context` to the st20p rx flags during session initialization.
 
 **Warning:** Direct memory access functionality is disabled when using this flag. Memory allocated in VRAM cannot be accessed directly using dpdk API.
-
-### Links
-
-- [Level Zero Intro](https://www.intel.com/content/www/us/en/developer/articles/technical/using-oneapi-level-zero-interface.html)
diff --git a/ecosystem/ffmpeg_plugin/README.md b/ecosystem/ffmpeg_plugin/README.md
@@ -152,38 +152,41 @@ ffmpeg -stream_loop -1 -video_size 1920x1080 -f rawvideo -pix_fmt yuv422p10le -i
 Reading a st2110-30 stream(pcm24,1ms packet time,2 channels) on "239.168.85.20:30000" with payload_type 111 and encoded to a wav file:
 
 ```bash
-ffmpeg -p_port 0000:af:01.0 -p_sip 192.168.96.2 -p_rx_ip 239.168.85.20 -udp_port 30000 -payload_type 111 -pcm_fmt pcm24 -at 1ms -ac 2 -f mtl_st30p -i "0" dump.wav -y
+ffmpeg -p_port 0000:af:01.0 -p_sip 192.168.96.2 -p_rx_ip 239.168.85.20 -udp_port 30000 -payload_type 111 -pcm_fmt pcm24 -ptime 1ms -channels 2 -f mtl_st30p -i "0" dump.wav -y
 ```
 
 ### 4.2 St30p output
 
 Reading from a wav file and sending a st2110-30 stream(pcm24,1ms packet time,2 channels) on "239.168.85.20:30000" with payload_type 111:
 
 ```bash
-ffmpeg -stream_loop -1 -i test.wav -p_port 0000:af:01.1 -p_sip 192.168.96.3 -p_tx_ip 239.168.85.20 -udp_port 30000 -payload_type 111 -at 1ms -f mtl_st30p -
+ffmpeg -stream_loop -1 -i test.wav -p_port 0000:af:01.1 -p_sip 192.168.96.3 -p_tx_ip 239.168.85.20 -udp_port 30000 -payload_type 111 -ptime 1ms -f mtl_st30p -
 ```
 
 ### 4.3 St30p pcm16 example
 
 For pcm16 audio, use `mtl_st30p_pcm16` muxer, set `pcm_fmt` to `pcm16` for demuxer.
 
 ```bash
-ffmpeg -stream_loop -1 -i test.wav -p_port 0000:af:01.1 -p_sip 192.168.96.3 -p_tx_ip 239.168.85.20 -udp_port 30000 -payload_type 111 -at 1ms -f mtl_st30p_pcm16 -
+ffmpeg -stream_loop -1 -i test.wav -p_port 0000:af:01.1 -p_sip 192.168.96.3 -p_tx_ip 239.168.85.20 -udp_port 30000 -payload_type 111 -ptime 1ms -f mtl_st30p_pcm16 -
 
-ffmpeg -p_port 0000:af:01.0 -p_sip 192.168.96.2 -p_rx_ip 239.168.85.20 -udp_port 30000 -payload_type 111 -pcm_fmt pcm16 -at 1ms -ac 2 -f mtl_st30p -i "0" dump_pcm16.wav -y
+ffmpeg -p_port 0000:af:01.0 -p_sip 192.168.96.2 -p_rx_ip 239.168.85.20 -udp_port 30000 -payload_type 111 -pcm_fmt pcm16 -ptime 1ms -channels 2 -f mtl_st30p -i "0" dump_pcm16.wav -y
 ```
 
-### Enabling experimental MTL_GPU_DIRECT in FFmpeg with ST20p Support
+## 5. St20 GPU direct guide
 
 The MTL_GPU_DIRECT experimental feature aims at enhancing FFmpeg's performance by allowing direct access to GPU memory, which can be particularly beneficial when working with high-throughput video streams such as those handled by the MTL ST20 codec plugin.
 
-#### Building FFmpeg with MTL_GPU_DIRECT Enabled
+### 5.1 Enabling experimental MTL_GPU_DIRECT in FFmpeg with ST20p Support
+
 To take advantage of the MTL_GPU_DIRECT feature FFmpeg has to be built with this option enabled. Here’s how to do it:
 
 ```bash
 ./configure --enable-shared --disable-static --enable-nonfree --enable-pic --enable-gpl --enable-libopenh264 --enable-encoder=libopenh264 --enable-mtl --extra-cflags="-DMTL_GPU_DIRECT_ENABLED"
 ```
+
 or use
+
 ```bash
 ./build_ffmpeg_plugin.sh -g
 ```
@@ -195,17 +198,20 @@ enabled gpu_direct:
 ./ffmpeg -p_port 0000:af:01.0 -p_sip 192.168.96.2 -p_rx_ip 239.168.85.20 -udp_port 20000 -payload_type 112 -fps 59.94 -pix_fmt yuv422p10le -video_size 1920x1080 -gpu_direct 1 -gpu_driver 0 -gpu_device 0 -f mtl_st20p -i "k" -f rawvideo /dev/null -y
 ```
 
-#### Additional Notes
+### 5.2 Additional Notes
+
 **GPU Direct Flag:** When compiling FFmpeg with the MTL_GPU_DIRECT feature enabled, ensure that your system's GPU drivers and hardware support direct GPU memory access.  
 GPU device IDs and GPU driver IDs are printed during initialization.
 
 **Options:**
+
 1. `-gpu_device`
 1. `-gpu_driver`
 
 Both default to 0, but if your device doesn't initialize, adjust it using the information printed during initialization.
 
 **Example:**
+
 ```plaintext
 Drivers count: 1
 Driver: 0: Device: 0: Name: Intel(R) Data Center GPU Flex 170, Type: 1, VendorID: 8086, DeviceID: 22208
diff --git a/ecosystem/ffmpeg_plugin/mtl_st30p_rx.c b/ecosystem/ffmpeg_plugin/mtl_st30p_rx.c
@@ -119,7 +119,7 @@ static int mtl_st30p_read_header(AVFormatContext* ctx) {
   ops_rx.ptime = s->ptime;
   ops_rx.channel = s->channels;
   ret = mtl_parse_st30_sample_rate(&ops_rx.sampling, s->sample_rate);
-  if (!ret) {
+  if (ret) {
     err(ctx, "%s, invalid sample_rate: %d\n", __func__, s->sample_rate);
     return ret;
   }
@@ -266,16 +266,16 @@ static const AVOption mtl_st30p_rx_options[] = {
      0,
      60,
      DEC},
-    {"ar",
-     "audio sampling rate",
+    {"sample_rate",
+     "audio sample rate",
      OFFSET(sample_rate),
      AV_OPT_TYPE_INT,
      {.i64 = 48000},
      1,
      INT_MAX,
      DEC},
-    {"ac",
-     "audio channel",
+    {"channels",
+     "number of audio channels",
      OFFSET(channels),
      AV_OPT_TYPE_INT,
      {.i64 = 2},
@@ -288,7 +288,7 @@ static const AVOption mtl_st30p_rx_options[] = {
      AV_OPT_TYPE_STRING,
      {.str = NULL},
      .flags = DEC},
-    {"at",
+    {"ptime",
      "audio packet time",
      OFFSET(ptime_str),
      AV_OPT_TYPE_STRING,
diff --git a/ecosystem/ffmpeg_plugin/mtl_st30p_tx.c b/ecosystem/ffmpeg_plugin/mtl_st30p_tx.c
@@ -111,7 +111,7 @@ static int mtl_st30p_write_header(AVFormatContext* ctx) {
   ops_tx.channel = codecpar->ch_layout.nb_channels;
 #endif
   ret = mtl_parse_st30_sample_rate(&ops_tx.sampling, codecpar->sample_rate);
-  if (!ret) {
+  if (ret) {
     err(ctx, "%s, unknown sample_rate %d\n", __func__, codecpar->sample_rate);
     return ret;
   }
@@ -230,7 +230,7 @@ static const AVOption mtl_st30p_tx_options[] = {
      3,
      8000,
      ENC},
-    {"at",
+    {"ptime",
      "audio packet time",
      OFFSET(ptime_str),
      AV_OPT_TYPE_STRING,
diff --git a/lib/src/st2110/st_rx_video_session.c b/lib/src/st2110/st_rx_video_session.c
@@ -457,11 +457,11 @@ static int rv_alloc_frames(struct mtl_main_impl* impl,
     } else {
 #ifdef MTL_GPU_DIRECT_ENABLED
       if (rv_framebuffer_in_gpu_direct_vram(s)) {
-        info("%s: using gpu direct feature.\n", __func__);
+        info("%s: using GPU direct feature.\n", __func__);
         GpuContext* gpu = s->ops.gpu_context;
         ret = gpu_allocate_shared_buffer(gpu, &frame, size);
         if (ret < 0) {
-          err("%s: failed to allocate gpu memory on vram. ret: %d\n", __func__, ret);
+          err("%s: failed to allocate GPU memory on vram. ret: %d\n", __func__, ret);
           return -ENOMEM;
         }
       } else

Original file line number	Diff line number	Diff line change
`@@ -480,8 +480,8 @@ static int app_tx_fmd_init(struct st_app_context* ctx,`
`480`	`480`	`}`
`481`	`481`
`482`	`482`	`/* copying frame fields for RTP mode to function*/`
`483`		`- s->st41_dit = fmd->info.fmd_dit;`
`484`		`- s->st41_k_bit = fmd->info.fmd_k_bit;`
	`483`	`+ s->st41_dit = ops.fmd_dit;`
	`484`	`+ s->st41_k_bit = ops.fmd_k_bit;`
`485`	`485`
`486`	`486`	`s->handle = handle;`
`487`	`487`	`snprintf(s->st41_source_url, sizeof(s->st41_source_url), "%s",`