Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

server: ci: windows build and tests #5968

Merged
merged 10 commits into from
Mar 10, 2024
46 changes: 45 additions & 1 deletion .github/workflows/server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ jobs:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Dependencies
id: depends
Expand All @@ -58,7 +60,6 @@ jobs:
cmake \
python3-pip \
wget \
psmisc \
language-pack-en

- name: Build
Expand Down Expand Up @@ -90,3 +91,46 @@ jobs:
run: |
cd examples/server/tests
PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow


server-windows:
runs-on: windows-latest

steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Build
id: cmake_build
run: |
mkdir build
cd build
cmake .. -DLLAMA_BUILD_SERVER=ON -DCMAKE_BUILD_TYPE=Release ;
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server

- name: Python setup
id: setup_python
uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Tests dependencies
id: test_dependencies
run: |
pip install -r examples/server/tests/requirements.txt

- name: Tests
id: server_integration_tests
run: |
cd examples/server/tests
behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp

- name: Slow tests
id: server_integration_tests_slow
if: ${{ github.event.schedule != '' || github.event.inputs.slow_tests == 'true' }}
run: |
cd examples/server/tests
behave.exe --stop --no-skipped --no-capture --tags slow
66 changes: 47 additions & 19 deletions examples/server/tests/features/environment.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import errno
import os
import socket
import subprocess
import time
from contextlib import closing
from signal import SIGKILL
import signal


def before_scenario(context, scenario):
Expand All @@ -29,44 +30,71 @@ def after_scenario(context, scenario):
for line in f:
print(line)
if not is_server_listening(context.server_fqdn, context.server_port):
print("\x1b[33;101mERROR: Server stopped listening\x1b[0m")
print("\x1b[33;101mERROR: Server stopped listening\x1b[0m\n")

if not pid_exists(context.server_process.pid):
assert False, f"Server not running pid={context.server_process.pid} ..."

print(f"stopping server pid={context.server_process.pid} ...")
context.server_process.kill()
server_graceful_shutdown(context)

# Wait few for socket to free up
time.sleep(0.05)

attempts = 0
while is_server_listening(context.server_fqdn, context.server_port):
print(f"stopping server pid={context.server_process.pid} ...")
os.kill(context.server_process.pid, SIGKILL)
while pid_exists(context.server_process.pid) or is_server_listening(context.server_fqdn, context.server_port):
server_kill(context)
time.sleep(0.1)
attempts += 1
if attempts > 5:
print(f"Server dangling exits, killing all {context.server_path} ...")
process = subprocess.run(['killall', '-9', context.server_path],
stderr=subprocess.PIPE,
universal_newlines=True)
print(process)
server_kill_hard(context)


def server_graceful_shutdown(context):
print(f"shutting down server pid={context.server_process.pid} ...\n")
if os.name == 'nt':
os.kill(context.server_process.pid, signal.CTRL_C_EVENT)
else:
os.kill(context.server_process.pid, signal.SIGINT)


def server_kill(context):
print(f"killing server pid={context.server_process.pid} ...\n")
context.server_process.kill()


def server_kill_hard(context):
pid = context.server_process.pid
path = context.server_path

print(f"Server dangling exits, hard killing force {pid}={path}...\n")
if os.name == 'nt':
process = subprocess.check_output(['taskkill', '/F', '/pid', str(pid)]).decode()
print(process)
else:
os.kill(-pid, signal.SIGKILL)


def is_server_listening(server_fqdn, server_port):
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
result = sock.connect_ex((server_fqdn, server_port))
return result == 0
_is_server_listening = result == 0
if _is_server_listening:
print(f"server is listening on {server_fqdn}:{server_port}...\n")
return _is_server_listening


def pid_exists(pid):
"""Check whether pid exists in the current process table."""
import errno
if pid < 0:
return False
try:
os.kill(pid, 0)
except OSError as e:
return e.errno == errno.EPERM
if os.name == 'nt':
output = subprocess.check_output(['TASKLIST', '/FI', f'pid eq {pid}']).decode()
print(output)
return "No tasks are running" not in output
else:
return True
try:
os.kill(pid, 0)
except OSError as e:
return e.errno == errno.EPERM
else:
return True
2 changes: 1 addition & 1 deletion examples/server/tests/features/server.feature
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ Feature: llama.cpp server
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
"""
And a completion request with no api error
Then 64 tokens are predicted matching fun|Annaks|popcorns
Then 64 tokens are predicted matching fun|Annaks|popcorns|pictry
And the completion is truncated
And 109 prompt tokens are processed

Expand Down