Skip to content

Commit 2632493

Browse files
authored
Consolidate entrypoint to support broader deployment scenarios (#6566)
### What problem does this PR solve? This PR gives better control over how we distribute which service will be loaded. With this approach, we can create containers to run only the web server and others to run the task executor. It also introduces the unique ID per task executor host, this will be important when scaling task executors horizontally, considering unique task executor ids will be required. This new `entrypoint.sh` maintains the default behavior of starting the web server and task executor in the same host. ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [X] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe):
1 parent c61df5d commit 2632493

File tree

3 files changed

+129
-54
lines changed

3 files changed

+129
-54
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ COPY agentic_reasoning agentic_reasoning
201201
COPY pyproject.toml uv.lock ./
202202

203203
COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
204-
COPY docker/entrypoint.sh docker/entrypoint-parser.sh ./
204+
COPY docker/entrypoint.sh ./
205205
RUN chmod +x ./entrypoint*.sh
206206

207207
# Copy compiled web pages

docker/entrypoint-parser.sh

Lines changed: 0 additions & 29 deletions
This file was deleted.

docker/entrypoint.sh

100755100644
Lines changed: 128 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,139 @@
1-
#!/bin/bash
1+
#!/usr/bin/env bash
22

3-
# replace env variables in the service_conf.yaml file
4-
rm -rf /ragflow/conf/service_conf.yaml
5-
while IFS= read -r line || [[ -n "$line" ]]; do
6-
# Use eval to interpret the variable with default values
7-
eval "echo \"$line\"" >> /ragflow/conf/service_conf.yaml
8-
done < /ragflow/conf/service_conf.yaml.template
3+
set -e
94

10-
/usr/sbin/nginx
5+
# -----------------------------------------------------------------------------
6+
# Usage and command-line argument parsing
7+
# -----------------------------------------------------------------------------
8+
function usage() {
9+
echo "Usage: $0 [--disable-webserver] [--disable-taskexecutor] [--consumer-no-beg=<num>] [--consumer-no-end=<num>] [--workers=<num>] [--host-id=<string>]"
10+
echo
11+
echo " --disable-webserver Disables the web server (nginx + ragflow_server)."
12+
echo " --disable-taskexecutor Disables task executor workers."
13+
echo " --consumer-no-beg=<num> Start range for consumers (if using range-based)."
14+
echo " --consumer-no-end=<num> End range for consumers (if using range-based)."
15+
echo " --workers=<num> Number of task executors to run (if range is not used)."
16+
echo " --host-id=<string> Unique ID for the host (defaults to \`hostname\`)."
17+
echo
18+
echo "Examples:"
19+
echo " $0 --disable-taskexecutor"
20+
echo " $0 --disable-webserver --consumer-no-beg=0 --consumer-no-end=5"
21+
echo " $0 --disable-webserver --workers=2 --host-id=myhost123"
22+
exit 1
23+
}
1124

12-
export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu/
25+
ENABLE_WEBSERVER=1 # Default to enable web server
26+
ENABLE_TASKEXECUTOR=1 # Default to enable task executor
27+
CONSUMER_NO_BEG=0
28+
CONSUMER_NO_END=0
29+
WORKERS=1
1330

14-
PY=python3
15-
if [[ -z "$WS" || $WS -lt 1 ]]; then
16-
WS=1
31+
# -----------------------------------------------------------------------------
32+
# Host ID logic:
33+
# 1. By default, use the system hostname if length <= 32
34+
# 2. Otherwise, use the full MD5 hash of the hostname (32 hex chars)
35+
# -----------------------------------------------------------------------------
36+
CURRENT_HOSTNAME="$(hostname)"
37+
if [ ${#CURRENT_HOSTNAME} -le 32 ]; then
38+
DEFAULT_HOST_ID="$CURRENT_HOSTNAME"
39+
else
40+
DEFAULT_HOST_ID="$(echo -n "$CURRENT_HOSTNAME" | md5sum | cut -d ' ' -f 1)"
1741
fi
1842

19-
function task_exe(){
20-
JEMALLOC_PATH=$(pkg-config --variable=libdir jemalloc)/libjemalloc.so
21-
while [ 1 -eq 1 ];do
22-
LD_PRELOAD=$JEMALLOC_PATH $PY rag/svr/task_executor.py $1;
43+
HOST_ID="$DEFAULT_HOST_ID"
44+
45+
# Parse arguments
46+
for arg in "$@"; do
47+
case $arg in
48+
--disable-webserver)
49+
ENABLE_WEBSERVER=0
50+
shift
51+
;;
52+
--disable-taskexecutor)
53+
ENABLE_TASKEXECUTOR=0
54+
shift
55+
;;
56+
--consumer-no-beg=*)
57+
CONSUMER_NO_BEG="${arg#*=}"
58+
shift
59+
;;
60+
--consumer-no-end=*)
61+
CONSUMER_NO_END="${arg#*=}"
62+
shift
63+
;;
64+
--workers=*)
65+
WORKERS="${arg#*=}"
66+
shift
67+
;;
68+
--host-id=*)
69+
HOST_ID="${arg#*=}"
70+
shift
71+
;;
72+
*)
73+
usage
74+
;;
75+
esac
76+
done
77+
78+
# -----------------------------------------------------------------------------
79+
# Replace env variables in the service_conf.yaml file
80+
# -----------------------------------------------------------------------------
81+
CONF_DIR="/ragflow/conf"
82+
TEMPLATE_FILE="${CONF_DIR}/service_conf.yaml.template"
83+
CONF_FILE="${CONF_DIR}/service_conf.yaml"
84+
85+
rm -f "${CONF_FILE}"
86+
while IFS= read -r line || [[ -n "$line" ]]; do
87+
eval "echo \"$line\"" >> "${CONF_FILE}"
88+
done < "${TEMPLATE_FILE}"
89+
90+
export LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu/"
91+
PY=python3
92+
93+
# -----------------------------------------------------------------------------
94+
# Function(s)
95+
# -----------------------------------------------------------------------------
96+
97+
function task_exe() {
98+
local consumer_id="$1"
99+
local host_id="$2"
100+
101+
JEMALLOC_PATH="$(pkg-config --variable=libdir jemalloc)/libjemalloc.so"
102+
while true; do
103+
LD_PRELOAD="$JEMALLOC_PATH" \
104+
"$PY" rag/svr/task_executor.py "${host_id}_${consumer_id}"
23105
done
24106
}
25107

26-
for ((i=0;i<WS;i++))
27-
do
28-
task_exe $i &
29-
done
108+
# -----------------------------------------------------------------------------
109+
# Start components based on flags
110+
# -----------------------------------------------------------------------------
30111

31-
while [ 1 -eq 1 ];do
32-
$PY api/ragflow_server.py
33-
done
112+
if [[ "${ENABLE_WEBSERVER}" -eq 1 ]]; then
113+
echo "Starting nginx..."
114+
/usr/sbin/nginx
115+
116+
echo "Starting ragflow_server..."
117+
while true; do
118+
"$PY" api/ragflow_server.py
119+
done &
120+
fi
121+
122+
if [[ "${ENABLE_TASKEXECUTOR}" -eq 1 ]]; then
123+
if [[ "${CONSUMER_NO_END}" -gt "${CONSUMER_NO_BEG}" ]]; then
124+
echo "Starting task executors on host '${HOST_ID}' for IDs in [${CONSUMER_NO_BEG}, ${CONSUMER_NO_END})..."
125+
for (( i=CONSUMER_NO_BEG; i<CONSUMER_NO_END; i++ ))
126+
do
127+
task_exe "${i}" "${HOST_ID}" &
128+
done
129+
else
130+
# Otherwise, start a fixed number of workers
131+
echo "Starting ${WORKERS} task executor(s) on host '${HOST_ID}'..."
132+
for (( i=0; i<WORKERS; i++ ))
133+
do
134+
task_exe "${i}" "${HOST_ID}" &
135+
done
136+
fi
137+
fi
34138

35-
wait;
139+
wait

0 commit comments

Comments
 (0)