Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

server: ci: windows build and tests #5968

Merged
merged 10 commits into from
Mar 10, 2024
43 changes: 43 additions & 0 deletions .github/workflows/server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,46 @@ jobs:
run: |
cd examples/server/tests
PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow


server-windows:
runs-on: windows-latest

steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Build
id: cmake_build
run: |
mkdir build
cd build
cmake .. -DLLAMA_BUILD_SERVER=ON -DCMAKE_BUILD_TYPE=Release ;
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server

- name: Python setup
id: setup_python
uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Tests dependencies
id: test_dependencies
run: |
pip install -r examples/server/tests/requirements.txt

- name: Tests
id: server_integration_tests
run: |
cd examples/server/tests
behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp

- name: Slow tests
id: server_integration_tests_slow
if: ${{ github.event.schedule != '' || github.event.inputs.slow_tests == 'true' }}
run: |
cd examples/server/tests
behave.exe --stop --no-skipped --no-capture --tags slow
32 changes: 22 additions & 10 deletions examples/server/tests/features/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import subprocess
import time
from contextlib import closing
from signal import SIGKILL
import signal


def before_scenario(context, scenario):
Expand All @@ -29,31 +29,43 @@ def after_scenario(context, scenario):
for line in f:
print(line)
if not is_server_listening(context.server_fqdn, context.server_port):
print("\x1b[33;101mERROR: Server stopped listening\x1b[0m")
print("\x1b[33;101mERROR: Server stopped listening\x1b[0m\n")

if not pid_exists(context.server_process.pid):
assert False, f"Server not running pid={context.server_process.pid} ..."

print(f"stopping server pid={context.server_process.pid} ...")
context.server_process.kill()
kill_server(context)

# Wait few for socket to free up
time.sleep(0.05)

attempts = 0
while is_server_listening(context.server_fqdn, context.server_port):
print(f"stopping server pid={context.server_process.pid} ...")
os.kill(context.server_process.pid, SIGKILL)
context.server_process.kill()
time.sleep(0.1)
attempts += 1
if attempts > 5:
print(f"Server dangling exits, killing all {context.server_path} ...")
process = subprocess.run(['killall', '-9', context.server_path],
stderr=subprocess.PIPE,
universal_newlines=True)
if os.name == 'nt':
print(f"Server dangling exits, task killing force {context.server_process.pid} ...\n")
process = subprocess.run(['taskkill', '/F', '/pid', str(context.server_process.pid)],
stderr=subprocess.PIPE)
else:
print(f"Server dangling exits, killing all {context.server_path} ...\n")
process = subprocess.run(['killall', '-9', context.server_path],
stderr=subprocess.PIPE)
phymbert marked this conversation as resolved.
Show resolved Hide resolved
print(process)


def kill_server(context):
print(f"stopping server pid={context.server_process.pid} ...\n")
if os.name == 'nt':
os.kill(context.server_process.pid, signal.CTRL_C_EVENT)
else:
os.kill(context.server_process.pid, signal.SIGKILL)


def is_server_listening(server_fqdn, server_port):
print(f"is server listening on {server_fqdn}:{server_port}...\n")
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
result = sock.connect_ex((server_fqdn, server_port))
return result == 0
Expand Down
2 changes: 1 addition & 1 deletion examples/server/tests/features/server.feature
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ Feature: llama.cpp server
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
"""
And a completion request with no api error
Then 64 tokens are predicted matching fun|Annaks|popcorns
Then 64 tokens are predicted matching fun|Annaks|popcorns|pictry
And the completion is truncated
And 109 prompt tokens are processed

Expand Down
45 changes: 33 additions & 12 deletions examples/server/tests/features/steps/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,17 +285,17 @@ def step_seed(context, seed):

@step(u'a prefix prompt')
phymbert marked this conversation as resolved.
Show resolved Hide resolved
def step_prompt_prefix(context):
context.prompt_prefix = context.text
context.prompt_prefix = context_text(context)


@step(u'a junk suffix prompt')
def step_prompt_junk_suffix(context):
context.prompt_junk_suffix = context.text
context.prompt_junk_suffix = context_text(context)


@step(u'a suffix prompt')
def step_prompt_suffix(context):
context.prompt_suffix = context.text
context.prompt_suffix = context_text(context)


@step(u'{n_ga:d} group attention factor'
Expand All @@ -311,7 +311,7 @@ def step_impl(context, n_ga_w):

@step(u'a passkey prompt template')
def step_prompt_passkey(context):
context.prompt_passkey = context.text
context.prompt_passkey = context_text(context)


@step(u'{n_prompts:d} fixed prompts')
Expand Down Expand Up @@ -371,7 +371,7 @@ async def step_oai_chat_completions(context, api_error):

@step(u'a prompt')
def step_a_prompt(context):
context.prompts.append(context.text)
context.prompts.append(context_text(context))
context.n_prompts = len(context.prompts)


Expand Down Expand Up @@ -464,7 +464,7 @@ async def all_prompts_are_predicted(context, expected_predicted_n=None):
@async_run_until_complete
async def step_compute_embedding(context):
context.n_prompts = 1
context.embeddings = await request_embedding(context.text, base_url=context.base_url)
context.embeddings = await request_embedding(context_text(context), base_url=context.base_url)


@step(u'all embeddings are the same')
Expand All @@ -491,6 +491,7 @@ async def step_all_embeddings_are_the_same(context):
print(f"{msg}\n")
assert np.isclose(similarity, 1.0, rtol=1e-05, atol=1e-08, equal_nan=False), msg


@step(u'embeddings are generated')
def step_assert_embeddings(context):
assert context.n_prompts == len(context.embeddings), (f"unexpected response:\n"
Expand All @@ -504,7 +505,7 @@ def step_assert_embeddings(context):
@async_run_until_complete
async def step_oai_compute_embeddings(context):
context.n_prompts = 1
context.embeddings = await request_oai_embeddings(context.text,
context.embeddings = await request_oai_embeddings(context_text(context),
base_url=context.base_url,
user_api_key=context.user_api_key,
model=context.model)
Expand Down Expand Up @@ -552,7 +553,7 @@ async def all_embeddings_are_generated(context):
@step(u'tokenizing')
@async_run_until_complete
async def step_tokenize(context):
context.tokenized_text = context.text
context.tokenized_text = context_text(context)
async with aiohttp.ClientSession() as session:
async with session.post(f'{context.base_url}/tokenize',
json={
Expand Down Expand Up @@ -1007,12 +1008,22 @@ async def completions_seed(context):
else context.server_seed if hasattr(context, 'server_seed') else None


def context_text(context):
return context.text.replace('\r', '')


def start_server_background(context):
context.server_path = '../../../build/bin/server'
if os.name == 'nt':
context.server_path = '../../../build/bin/Release/server.exe'
else:
context.server_path = '../../../build/bin/server'
if 'LLAMA_SERVER_BIN_PATH' in os.environ:
context.server_path = os.environ['LLAMA_SERVER_BIN_PATH']
server_listen_addr = context.server_fqdn
if os.name == 'nt':
server_listen_addr = '0.0.0.0'
server_args = [
'--host', context.server_fqdn,
'--host', server_listen_addr,
'--port', context.server_port,
'--model', context.model_file
]
Expand Down Expand Up @@ -1045,7 +1056,17 @@ def start_server_background(context):
if 'SERVER_LOG_FORMAT_JSON' not in os.environ:
server_args.extend(['--log-format', "text"])
print(f"starting server with: {context.server_path} {server_args}\n")
flags = 0
if 'nt' == os.name:
flags |= 0x00000008 # DETACHED_PROCESS
flags |= 0x00000200 # CREATE_NEW_PROCESS_GROUP
flags |= 0x08000000 # CREATE_NO_WINDOW
phymbert marked this conversation as resolved.
Show resolved Hide resolved

pkwargs = {
'close_fds': True, # close stdin/stdout/stderr on child
phymbert marked this conversation as resolved.
Show resolved Hide resolved
'creationflags': flags,
}
context.server_process = subprocess.Popen(
[str(arg) for arg in [context.server_path, *server_args]],
close_fds=True)
print(f"server pid={context.server_process.pid}")
**pkwargs)
print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}")