Skip to content

v0.12.0-ce部署模型失败(deploy model fail) #1430

@caizhenghao

Description

@caizhenghao

Describe the bug
部署会出现部署失败但是页面和log文件都没有任何日志提示的情况

Environment
CSGHub Version: v0.12.0
OS: Linux(openEuler 2203版本)
Hardware: 16c32g
Launch: docker compose 部署 csghub + helm 部署runner服务

页面情况,部署失败,日志为空
Image

runner日志

10.42.0.1 - - [21/Nov/2025:04:14:19 +0000] "GET /api/v1/cluster/f90e1c6c-799d-4c24-a342-ee900e0b4950 HTTP/1.1" 200 441 "-" "Go-http-client/1.1" 343 0.014 [csghub-runner-runner-8082] [] 10.42.0.240:8082 441 0.014 200 b0ba5ff56d8dce7c7771c2a5e6758497
10.42.0.1 - - [21/Nov/2025:04:14:19 +0000] "POST /api/v1/service/f4ragh1harr4/run HTTP/1.1" 200 42 "-" "Go-http-client/1.1" 1358 0.038 [csghub-runner-runner-8082] [] 10.42.0.240:8082 42 0.038 200 801560dee4f3172d0cbfdc872ef66e39
10.42.0.1 - - [21/Nov/2025:04:14:19 +0000] "GET /api/v1/cluster/f90e1c6c-799d-4c24-a342-ee900e0b4950 HTTP/1.1" 200 441 "-" "Go-http-client/1.1" 343 0.011 [csghub-runner-runner-8082] [] 10.42.0.240:8082 441 0.011 200 d26197c3d7e931cd5f79b0cd1a41dd14
10.42.0.1 - - [21/Nov/2025:04:14:19 +0000] "GET /api/v1/service/f4ragh1harr4/replica HTTP/1.1" 200 104 "-" "Go-http-client/1.1" 525 0.002 [csghub-runner-runner-8082] [] 10.42.0.240:8082 104 0.002 200 e538222e7f6b8de205fa9b6c32749db1
10.42.0.1 - - [21/Nov/2025:04:14:19 +0000] "GET /api/v1/service/f4ragh1harr4/get HTTP/1.1" 200 221 "-" "Go-http-client/1.1" 459 0.001 [csghub-runner-runner-8082] [] 10.42.0.240:8082 221 0.001 200 4c2e3b041070890089d18e41f7abffed
10.42.0.1 - - [21/Nov/2025:04:14:25 +0000] "GET /api/v1/service/f4ragh1harr4/get HTTP/1.1" 200 221 "-" "Go-http-client/1.1" 459 0.002 [csghub-runner-runner-8082] [] 10.42.0.240:8082 221 0.002 200 dca2c6ba2a511dd8aa3d932138f0308c
10.42.0.1 - - [21/Nov/2025:04:14:30 +0000] "GET /api/v1/service/f4ragh1harr4/get HTTP/1.1" 200 260 "-" "Go-http-client/1.1" 459 0.002 [csghub-runner-runner-8082] [] 10.42.0.240:8082 260 0.002 200 7a886b7e25a6d64b81baa848de18e289
10.42.0.1 - - [21/Nov/2025:04:14:30 +0000] "GET /api/v1/service/f4ragh1harr4/replica HTTP/1.1" 200 104 "-" "Go-http-client/1.1" 525 0.001 [csghub-runner-runner-8082] [] 10.42.0.240:8082 104 0.001 200 fb4f6b5d95ac1ba1b818da26e08bc126
10.42.0.1 - - [21/Nov/2025:04:14:30 +0000] "GET /api/v1/service/f4ragh1harr4/get HTTP/1.1" 200 260 "-" "Go-http-client/1.1" 459 0.002 [csghub-runner-runner-8082] [] 10.42.0.240:8082 260 0.001 200 25d6228776d5908717a369cbf97f3ec7
10.42.0.1 - - [21/Nov/2025:04:14:35 +0000] "GET /api/v1/service/f4ragh1harr4/get HTTP/1.1" 200 260 "-" "Go-http-client/1.1" 459 0.002 [csghub-runner-runner-8082] [] 10.42.0.240:8082 260 0.002 200 1e7ed06f23aadd06630326466b704499
10.42.0.1 - - [21/Nov/2025:04:14:40 +0000] "GET /api/v1/service/f4ragh1harr4/get HTTP/1.1" 200 260 "-" "Go-http-client/1.1" 459 0.001 [csghub-runner-runner-8082] [] 10.42.0.240:8082 260 0.001 200 8ac1353e82a7a9db980d5ff012ebcf6a

server日志

2025-11-21_04:14:19.10267 {"time":"2025-11-21T04:14:19.102607249Z","level":"WARN","msg":"Log entry dropped because log collector is not ready"}
2025-11-21_04:14:19.10278 {"time":"2025-11-21T04:14:19.102735877Z","level":"INFO","msg":"http request","ip":"10.11.9.137","method":"POST","latency(ms)":31,"status":200,"current_user":"root","auth_type":"JWT","url":"/api/v1/models/root/Qwen2.5-0.5B-Instruct/run","full_path":"/api/v1/models/:namespace/:name/run","trace_id":"47f3d81e-162d-4c12-aaee-199a350a60d0"}
2025-11-21_04:14:19.14949 {"time":"2025-11-21T04:14:19.149432572Z","level":"INFO","msg":"http request","ip":"172.23.0.1","method":"GET","latency(ms)":3,"status":200,"current_user":"","auth_type":"ApiKey","url":"/api/v1/user/63b7c9d1-b177-4116-852a-3ecd9467372d?type=uuid","full_path":"/api/v1/user/:username","trace_id":"35839a94-9b45-4fe2-a4b5-d7da7d6734f7"}
2025-11-21_04:14:19.24657 {"time":"2025-11-21T04:14:19.246470323Z","level":"INFO","msg":"http request","ip":"172.23.0.1","method":"POST","latency(ms)":1,"status":200,"current_user":"","auth_type":"ApiKey","url":"/api/v1/webhook/runner","full_path":"/api/v1/webhook/runner","trace_id":"77e472f40e9949228166aa59daafa340"}
2025-11-21_04:14:19.24660 {"time":"2025-11-21T04:14:19.246527499Z","level":"INFO","msg":"deploy_event_received","event":{"event_type":"runner.service.create","event_time":1763698459,"cluster_id":"f90e1c6c-799d-4c24-a342-ee900e0b4950","runner_name":"","data_type":"object","data":{"service_name":"f4ragh1harr4","status":20,"endpoint":"","message":"","reason":"create","task_id":6}}}
2025-11-21_04:14:19.26470 {"time":"2025-11-21T04:14:19.264563141Z","level":"INFO","msg":"http request","ip":"172.23.0.1","method":"POST","latency(ms)":0,"status":200,"current_user":"","auth_type":"ApiKey","url":"/api/v1/webhook/runner","full_path":"/api/v1/webhook/runner","trace_id":"65283d1082f24c98b9859d761884f275"}
2025-11-21_04:14:19.26507 {"time":"2025-11-21T04:14:19.264911781Z","level":"INFO","msg":"deploy_event_received","event":{"event_type":"runner.service.create","event_time":1763698459,"cluster_id":"f90e1c6c-799d-4c24-a342-ee900e0b4950","runner_name":"","data_type":"object","data":{"service_name":"f4ragh1harr4","status":20,"endpoint":"","message":"","reason":"create","task_id":6}}}
2025-11-21_04:14:19.75873 {"time":"2025-11-21T04:14:19.758653862Z","level":"INFO","msg":"http request","ip":"10.11.9.137","method":"GET","latency(ms)":0,"status":200,"current_user":"root","auth_type":"JWT","url":"/api/v1/broadcasts/active","full_path":"/api/v1/broadcasts/active","trace_id":"2a40d51c-c7d8-4810-8207-7a0a95902fb4"}
2025-11-21_04:14:19.76079 {"time":"2025-11-21T04:14:19.760746585Z","level":"INFO","msg":"http request","ip":"10.11.9.137","method":"GET","latency(ms)":2,"status":200,"current_user":"root","auth_type":"JWT","url":"/api/v1/user/63b7c9d1-b177-4116-852a-3ecd9467372d?type=uuid","full_path":"/api/v1/user/:username","trace_id":"7b3f5854-7ff3-470c-bc2b-a6561eb2d06b"}
2025-11-21_04:14:19.77138 {"time":"2025-11-21T04:14:19.771327313Z","level":"INFO","msg":"http request","ip":"10.11.9.137","method":"GET","latency(ms)":5,"status":200,"current_user":"root","auth_type":"JWT","url":"/api/v1/tags","full_path":"/api/v1/tags","trace_id":"9ef0a2ea-b06b-4fac-b192-e2cd21c3aab9"}
2025-11-21_04:14:19.77182 {"time":"2025-11-21T04:14:19.771782566Z","level":"INFO","msg":"Get space resources successfully"}
2025-11-21_04:14:19.77189 {"time":"2025-11-21T04:14:19.771853984Z","level":"INFO","msg":"http request","ip":"10.11.9.137","method":"GET","latency(ms)":13,"status":200,"current_user":"root","auth_type":"JWT","url":"/api/v1/space_resources?cluster_id=","full_path":"/api/v1/space_resources","trace_id":"662904e7-e91b-45d1-8d72-3ded96caf12e"}
2025-11-21_04:14:19.77551 {"time":"2025-11-21T04:14:19.775471155Z","level":"INFO","msg":"http request","ip":"10.11.9.137","method":"GET","latency(ms)":17,"status":200,"current_user":"root","auth_type":"JWT","url":"/api/v1/models/root/Qwen2.5-0.5B-Instruct/run/2","full_path":"/api/v1/models/:namespace/:name/run/:id","trace_id":"8846928a-70c9-4683-92e8-2083776f9972"}
2025-11-21_04:14:20.06438 {"time":"2025-11-21T04:14:20.064213382Z","level":"INFO","msg":"http request","ip":"10.11.9.137","method":"GET","latency(ms)":1,"status":200,"current_user":"root","auth_type":"JWT","url":"/api/v1/notifications/message-types","full_path":"/api/v1/notifications/message-types","trace_id":"417f0b2c-780e-471b-988d-9fb0f133d485"}
2025-11-21_04:14:20.06535 {"time":"2025-11-21T04:14:20.065313124Z","level":"INFO","msg":"http request","ip":"10.11.9.137","method":"GET","latency(ms)":2,"status":200,"current_user":"root","auth_type":"JWT","url":"/api/v1/notifications/count","full_path":"/api/v1/notifications/count","trace_id":"0fc8555b-a5b8-45e2-a318-779fec3f1904"}
2025-11-21_04:14:20.07858 {"time":"2025-11-21T04:14:20.078390409Z","level":"INFO","msg":"http request","trace_id":"22b6070c9d244bd29021ba533504607e","method":"GET","url":"http://127.0.0.1:8088/api/v1/namespace/root","status":200,"latency(ms)":1}
2025-11-21_04:14:20.08307 {"time":"2025-11-21T04:14:20.082954499Z","level":"INFO","msg":"Get model succeed","model":"Qwen2.5-0.5B-Instruct"}
2025-11-21_04:14:20.08315 {"time":"2025-11-21T04:14:20.083048404Z","level":"INFO","msg":"http request","ip":"10.11.9.137","method":"GET","latency(ms)":0,"status":200,"current_user":"root","auth_type":"JWT","url":"/api/v1/version","full_path":"/api/v1/version","trace_id":"f4a7d78a-f772-41f3-8181-e3063b67a060"}
2025-11-21_04:14:20.08360 {"time":"2025-11-21T04:14:20.083404138Z","level":"INFO","msg":"http request","ip":"10.11.9.137","method":"GET","latency(ms)":15,"status":200,"current_user":"root","auth_type":"JWT","url":"/api/v1/models/root/Qwen2.5-0.5B-Instruct","full_path":"/api/v1/models/:namespace/:name","trace_id":"8c23ffa0-e3ec-43e0-8b6f-3b216ba37663"}
2025-11-21_04:14:20.08620 {"time":"2025-11-21T04:14:20.086037329Z","level":"INFO","msg":"http request","ip":"10.11.9.137","method":"GET","latency(ms)":3,"status":200,"current_user":"root","auth_type":"JWT","url":"/api/v1/notifications/poll/1?timezone=Asia/Shanghai","full_path":"/api/v1/notifications/poll/:limit","trace_id":"1acc6d74-3155-4f62-8e0c-0e84b8c8bc5d"}
2025-11-21_04:14:29.38229 {"time":"2025-11-21T04:14:29.382229348Z","level":"INFO","msg":"http request","ip":"172.23.0.1","method":"POST","latency(ms)":1,"status":200,"current_user":"","auth_type":"ApiKey","url":"/api/v1/webhook/runner","full_path":"/api/v1/webhook/runner","trace_id":"97872e6f1b2b4261b0d3e67e3291a2e2"}
2025-11-21_04:14:29.38260 {"time":"2025-11-21T04:14:29.382391536Z","level":"INFO","msg":"deploy_event_received","event":{"event_type":"runner.service.change","event_time":1763698469,"cluster_id":"f90e1c6c-799d-4c24-a342-ee900e0b4950","runner_name":"","data_type":"object","data":{"service_name":"f4ragh1harr4","status":21,"endpoint":"http://f4ragh1harr4.spaces.app.internal","message":"","reason":"","task_id":6}}}
2025-11-21_04:14:30.09887 {"time":"2025-11-21T04:14:30.098812143Z","level":"INFO","msg":"http request","ip":"10.11.9.137","method":"GET","latency(ms)":7,"status":200,"current_user":"root","auth_type":"JWT","url":"/api/v1/models/root/Qwen2.5-0.5B-Instruct/run/2","full_path":"/api/v1/models/:namespace/:name/run/:id","trace_id":"3750b1d5-ffd7-4714-8fbf-21cb0069d6e7"}
2025-11-21_04:14:33.70744 {"time":"2025-11-21T04:14:33.707384037Z","level":"INFO","msg":"http request","ip":"172.23.0.1","method":"POST","latency(ms)":0,"status":200,"current_user":"","auth_type":"ApiKey","url":"/api/v1/webhook/runner","full_path":"/api/v1/webhook/runner","trace_id":"848b33fda49e44409b2158b972d7e262"}
2025-11-21_04:14:33.70801 {"time":"2025-11-21T04:14:33.707968979Z","level":"INFO","msg":"cluster_event_received","event":{"event_type":"runner.cluster.update","event_time":1763698473,"cluster_id":"f90e1c6c-799d-4c24-a342-ee900e0b4950","runner_name":"","data_type":"object","data":{"cluster_id":"f90e1c6c-799d-4c24-a342-ee900e0b4950","cluster_config":"config","region":"cn-north-1","zone":"","provider":"","enable":false,"storage_class":"","status":"Running","endpoint":"http://runner.trainpla.local:30080","network_interface":"","mode":"incluster","app_endpoint":"http://10.43.169.116"}}}
2025-11-21_04:14:33.70806 {"time":"2025-11-21T04:14:33.708018464Z","level":"INFO","msg":"processing cluster event","event":{"cluster_id":"f90e1c6c-799d-4c24-a342-ee900e0b4950","cluster_config":"config","region":"cn-north-1","zone":"","provider":"","enable":false,"storage_class":"","status":"Running","endpoint":"http://runner.trainpla.local:30080","network_interface":"","mode":"incluster","app_endpoint":"http://10.43.169.116"}}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions