-
Notifications
You must be signed in to change notification settings - Fork 5k
docs: move udf sample code to docs #33819
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| #include <stdio.h> | ||
| #include <stdlib.h> | ||
| #include <string.h> | ||
| #include "taosudf.h" | ||
|
|
||
| DLL_EXPORT int32_t bit_and_init() { return 0; } | ||
|
|
||
| DLL_EXPORT int32_t bit_and_destroy() { return 0; } | ||
|
|
||
| DLL_EXPORT int32_t bit_and(SUdfDataBlock* block, SUdfColumn* resultCol) { | ||
| udfTrace("block:%p, processing begins, rows:%d cols:%d", block, block->numOfRows, block->numOfCols); | ||
|
|
||
| if (block->numOfCols < 2) { | ||
| udfError("block:%p, cols:%d needs to be greater than 2", block, block->numOfCols); | ||
| return TSDB_CODE_UDF_INVALID_INPUT; | ||
| } | ||
|
|
||
| for (int32_t i = 0; i < block->numOfCols; ++i) { | ||
| SUdfColumn* col = block->udfCols[i]; | ||
| if (col->colMeta.type != TSDB_DATA_TYPE_INT) { | ||
| udfError("block:%p, col:%d type:%d should be int(%d)", block, i, col->colMeta.type, TSDB_DATA_TYPE_INT); | ||
| return TSDB_CODE_UDF_INVALID_INPUT; | ||
| } | ||
| } | ||
|
|
||
| SUdfColumnData* resultData = &resultCol->colData; | ||
|
|
||
| for (int32_t i = 0; i < block->numOfRows; ++i) { | ||
| if (udfColDataIsNull(block->udfCols[0], i)) { | ||
| udfColDataSetNull(resultCol, i); | ||
| udfTrace("block:%p, row:%d result is null since col:0 is null", block, i); | ||
| continue; | ||
| } | ||
|
|
||
| int32_t result = *(int32_t*)udfColDataGetData(block->udfCols[0], i); | ||
| udfTrace("block:%p, row:%d col:0 data:%d", block, i, result); | ||
|
|
||
| int32_t j = 1; | ||
| for (; j < block->numOfCols; ++j) { | ||
| if (udfColDataIsNull(block->udfCols[j], i)) { | ||
| udfColDataSetNull(resultCol, i); | ||
| udfTrace("block:%p, row:%d result is null since col:%d is null", block, i, j); | ||
| break; | ||
| } | ||
|
|
||
| char* colData = udfColDataGetData(block->udfCols[j], i); | ||
| result &= *(int32_t*)colData; | ||
| udfTrace("block:%p, row:%d col:%d data:%d", block, i, j, *(int32_t*)colData); | ||
| } | ||
|
|
||
| if (j == block->numOfCols) { | ||
| udfColDataSet(resultCol, i, (char*)&result, false); | ||
| udfTrace("block:%p, row:%d result is %d", block, i, result); | ||
| } | ||
| } | ||
|
|
||
| resultData->numOfRows = block->numOfRows; | ||
| udfTrace("block:%p, processing completed", block); | ||
|
|
||
| return TSDB_CODE_SUCCESS; | ||
| } | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,11 @@ | ||||||||||||||
| set +e | ||||||||||||||
|
|
||||||||||||||
| rm -rf /tmp/udf/libbitand.so /tmp/udf/libsqrsum.so /tmp/udf/libgpd.so | ||||||||||||||
| mkdir -p /tmp/udf | ||||||||||||||
| echo "compile udf bit_and and sqr_sum" | ||||||||||||||
|
Comment on lines
+3
to
+5
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The script has inconsistencies. It attempts to remove
Suggested change
|
||||||||||||||
| gcc -fPIC -shared cases/12-UDFs/sh/bit_and.c -I../../include/libs/function/ -I../../include/client -I../../include/util -o /tmp/udf/libbitand.so | ||||||||||||||
| gcc -fPIC -shared cases/12-UDFs/sh/l2norm.c -I../../include/libs/function/ -I../../include/client -I../../include/util -o /tmp/udf/libl2norm.so | ||||||||||||||
| gcc -fPIC -shared cases/12-UDFs/sh/gpd.c -I../../include/libs/function/ -I../../include/client -I../../include/util -o /tmp/udf/libgpd.so | ||||||||||||||
|
Comment on lines
+6
to
+8
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The source file paths in the
Suggested change
|
||||||||||||||
| echo "debug show /tmp/udf/*.so" | ||||||||||||||
| ls /tmp/udf/*.so | ||||||||||||||
|
|
||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,128 @@ | ||
| #include <stdio.h> | ||
| #include <stdlib.h> | ||
| #include <string.h> | ||
| #include "taos.h" | ||
| #include "taoserror.h" | ||
| #include "taosudf.h" | ||
|
|
||
| // Define a structure to store sum and count | ||
| typedef struct { | ||
| double sum; | ||
| int count; | ||
| } SumCount; | ||
|
|
||
| // initialization function | ||
| DLL_EXPORT int32_t extract_avg_init() { | ||
| udfTrace("extract_avg_init: Initializing UDF"); | ||
| return TSDB_CODE_SUCCESS; | ||
| } | ||
|
|
||
| DLL_EXPORT int32_t extract_avg_start(SUdfInterBuf *interBuf) { | ||
| int32_t bufLen = sizeof(SumCount); | ||
| if (interBuf->bufLen < bufLen) { | ||
| udfError("extract_avg_start: Failed to execute UDF since input buflen:%d < %d", interBuf->bufLen, bufLen); | ||
| return TSDB_CODE_UDF_INVALID_BUFSIZE; | ||
| } | ||
|
|
||
| // Initialize sum and count | ||
| SumCount *sumCount = (SumCount *)interBuf->buf; | ||
| sumCount->sum = 0.0; | ||
| sumCount->count = 0; | ||
|
|
||
| interBuf->numOfResult = 0; | ||
|
|
||
| udfTrace("extract_avg_start: Initialized sum=0.0, count=0"); | ||
| return TSDB_CODE_SUCCESS; | ||
| } | ||
|
|
||
| DLL_EXPORT int32_t extract_avg(SUdfDataBlock *inputBlock, SUdfInterBuf *interBuf, SUdfInterBuf *newInterBuf) { | ||
| udfTrace("extract_avg: Processing data block with %d rows", inputBlock->numOfRows); | ||
|
|
||
| // Check the number of columns in the input data block | ||
| if (inputBlock->numOfCols != 1) { | ||
| udfError("extract_avg: Invalid number of columns. Expected 1, got %d", inputBlock->numOfCols); | ||
| return TSDB_CODE_UDF_INVALID_INPUT; | ||
| } | ||
|
|
||
| // Get the input column | ||
| SUdfColumn *inputCol = inputBlock->udfCols[0]; | ||
|
|
||
| if (inputCol->colMeta.type != TSDB_DATA_TYPE_VARCHAR) { | ||
| udfError("extract_avg: Invalid data type. Expected VARCHAR, got %d", inputCol->colMeta.type); | ||
| return TSDB_CODE_UDF_INVALID_INPUT; | ||
| } | ||
|
|
||
| // Read the current sum and count from interBuf | ||
| SumCount *sumCount = (SumCount *)interBuf->buf; | ||
| udfTrace("extract_avg: Starting with sum=%f, count=%d", sumCount->sum, sumCount->count); | ||
|
|
||
| for (int i = 0; i < inputBlock->numOfRows; i++) { | ||
| if (udfColDataIsNull(inputCol, i)) { | ||
| udfTrace("extract_avg: Skipping NULL value at row %d", i); | ||
| continue; | ||
| } | ||
|
|
||
| char *buf = (char *)udfColDataGetData(inputCol, i); | ||
|
|
||
| char data[64]; | ||
| memset(data, 0, 64); | ||
| memcpy(data, varDataVal(buf), varDataLen(buf)); | ||
|
Comment on lines
+67
to
+69
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is a critical buffer overflow vulnerability here. The char data[129];
snprintf(data, sizeof(data), "%.*s", (int)varDataLen(buf), varDataVal(buf)); |
||
|
|
||
| udfTrace("extract_avg: Processing row %d, data='%s'", i, data); | ||
|
|
||
| char *rest = data; | ||
| char *token; | ||
| while ((token = strtok_r(rest, ",", &rest))) { | ||
| while (*token == ' ') token++; | ||
| int tokenLen = strlen(token); | ||
| while (tokenLen > 0 && token[tokenLen - 1] == ' ') token[--tokenLen] = '\0'; | ||
|
|
||
| if (tokenLen == 0) { | ||
| udfTrace("extract_avg: Empty string encountered at row %d", i); | ||
| continue; | ||
| } | ||
|
|
||
| char *endPtr; | ||
| double value = strtod(token, &endPtr); | ||
|
|
||
| if (endPtr == token || *endPtr != '\0') { | ||
| udfError("extract_avg: Failed to convert string '%s' to double at row %d", token, i); | ||
| continue; | ||
| } | ||
|
|
||
| sumCount->sum += value; | ||
| sumCount->count++; | ||
| udfTrace("extract_avg: Updated sum=%f, count=%d", sumCount->sum, sumCount->count); | ||
| } | ||
| } | ||
|
|
||
| newInterBuf->bufLen = sizeof(SumCount); | ||
| newInterBuf->buf = (char *)malloc(newInterBuf->bufLen); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function allocates memory for |
||
| if (newInterBuf->buf == NULL) { | ||
| udfError("extract_avg: Failed to allocate memory for newInterBuf"); | ||
| return TSDB_CODE_UDF_INTERNAL_ERROR; | ||
| } | ||
| memcpy(newInterBuf->buf, sumCount, newInterBuf->bufLen); | ||
| newInterBuf->numOfResult = 0; | ||
|
|
||
| udfTrace("extract_avg: Final sum=%f, count=%d", sumCount->sum, sumCount->count); | ||
| return TSDB_CODE_SUCCESS; | ||
| } | ||
|
|
||
| DLL_EXPORT int32_t extract_avg_finish(SUdfInterBuf *interBuf, SUdfInterBuf *result) { | ||
| SumCount *sumCount = (SumCount *)interBuf->buf; | ||
|
|
||
| double avg = (sumCount->count > 0) ? (sumCount->sum / sumCount->count) : 0.0; | ||
|
|
||
| *(double *)result->buf = avg; | ||
| result->bufLen = sizeof(double); | ||
| result->numOfResult = sumCount->count > 0 ? 1 : 0; | ||
|
|
||
| udfTrace("extract_avg_finish: Final result=%f (sum=%f, count=%d)", avg, sumCount->sum, sumCount->count); | ||
| return TSDB_CODE_SUCCESS; | ||
| } | ||
|
|
||
| DLL_EXPORT int32_t extract_avg_destroy() { | ||
| udfTrace("extract_avg_destroy: Cleaning up UDF"); | ||
| return TSDB_CODE_SUCCESS; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,46 @@ | ||
| #include <string.h> | ||
| #include <stdlib.h> | ||
| #include <stdio.h> | ||
| #ifdef LINUX | ||
| #include <unistd.h> | ||
| #endif | ||
| #ifdef WINDOWS | ||
| #include <windows.h> | ||
| #endif | ||
| #include "taosudf.h" | ||
|
|
||
| TAOS* taos = NULL; | ||
|
|
||
| DLL_EXPORT int32_t gpd_init() { | ||
| return 0; | ||
| } | ||
|
|
||
| DLL_EXPORT int32_t gpd_destroy() { | ||
| return 0; | ||
| } | ||
|
|
||
| DLL_EXPORT int32_t gpd(SUdfDataBlock* block, SUdfColumn *resultCol) { | ||
| SUdfColumnMeta *meta = &resultCol->colMeta; | ||
| meta->bytes = 4; | ||
| meta->type = TSDB_DATA_TYPE_INT; | ||
| meta->scale = 0; | ||
| meta->precision = 0; | ||
|
|
||
| SUdfColumnData *resultData = &resultCol->colData; | ||
| resultData->numOfRows = block->numOfRows; | ||
| for (int32_t i = 0; i < resultData->numOfRows; ++i) { | ||
| int64_t* calc_ts = (int64_t*)udfColDataGetData(block->udfCols[0], i); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| char* varTbname = udfColDataGetData(block->udfCols[1], i); | ||
| char* varDbname = udfColDataGetData(block->udfCols[2], i); | ||
|
|
||
| char dbName[256] = {0}; | ||
| char tblName[256] = {0}; | ||
| memcpy(dbName, varDataVal(varDbname), varDataLen(varDbname)); | ||
| memcpy(tblName, varDataVal(varTbname), varDataLen(varTbname)); | ||
|
Comment on lines
+38
to
+39
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is a critical buffer overflow vulnerability here. snprintf(dbName, sizeof(dbName), "%.*s", (int)varDataLen(varDbname), varDataVal(varDbname));
snprintf(tblName, sizeof(tblName), "%.*s", (int)varDataLen(varTbname), varDataVal(varTbname)); |
||
| printf("%s, %s\n", dbName, tblName); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| int32_t result = 0; | ||
| udfColDataSet(resultCol, i, (char*)&result, false); | ||
| } | ||
|
|
||
| return 0; | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The column count check is incorrect. The documentation states that the function should work for a single column, but the current check
block->numOfCols < 2causes it to fail with an error for one column. Additionally, the error messageneeds to be greater than 2is confusing and inconsistent with the check. The check should be for at least one column.