ggerganov · phymbert · Mar 10, 2024 · Mar 9, 2024 · Mar 10, 2024 · Mar 10, 2024
diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml
@@ -47,6 +47,8 @@ jobs:
  - name: Clone
  id: checkout
  uses: actions/checkout@v3
+ with:
+ fetch-depth: 0
 
  - name: Dependencies
  id: depends
@@ -58,7 +60,6 @@ jobs:
  cmake \
  python3-pip \
  wget \
- psmisc \
  language-pack-en
 
  - name: Build
@@ -90,3 +91,46 @@ jobs:
  run: |
  cd examples/server/tests
  PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
+
+
+ server-windows:
+ runs-on: windows-latest
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v3
+ with:
+ fetch-depth: 0
+
+ - name: Build
+ id: cmake_build
+ run: |
+ mkdir build
+ cd build
+ cmake .. -DLLAMA_BUILD_SERVER=ON -DCMAKE_BUILD_TYPE=Release ;
+ cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
+
+ - name: Python setup
+ id: setup_python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.11'
+
+ - name: Tests dependencies
+ id: test_dependencies
+ run: |
+ pip install -r examples/server/tests/requirements.txt
+
+ - name: Tests
+ id: server_integration_tests
+ run: |
+ cd examples/server/tests
+ behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
+
+ - name: Slow tests
+ id: server_integration_tests_slow
+ if: ${{ github.event.schedule != '' || github.event.inputs.slow_tests == 'true' }}
+ run: |
+ cd examples/server/tests
+ behave.exe --stop --no-skipped --no-capture --tags slow
diff --git a/examples/server/tests/features/environment.py b/examples/server/tests/features/environment.py
@@ -1,9 +1,10 @@
+import errno
 import os
 import socket
 import subprocess
 import time
 from contextlib import closing
-from signal import SIGKILL
+import signal
 
 
 def before_scenario(context, scenario):
@@ -29,44 +30,71 @@ def after_scenario(context, scenario):
  for line in f:
  print(line)
  if not is_server_listening(context.server_fqdn, context.server_port):
- print("\x1b[33;101mERROR: Server stopped listening\x1b[0m")
+ print("\x1b[33;101mERROR: Server stopped listening\x1b[0m\n")
 
  if not pid_exists(context.server_process.pid):
  assert False, f"Server not running pid={context.server_process.pid} ..."
 
- print(f"stopping server pid={context.server_process.pid} ...")
- context.server_process.kill()
+ server_graceful_shutdown(context)
+
  # Wait few for socket to free up
  time.sleep(0.05)
 
  attempts = 0
- while is_server_listening(context.server_fqdn, context.server_port):
- print(f"stopping server pid={context.server_process.pid} ...")
- os.kill(context.server_process.pid, SIGKILL)
+ while pid_exists(context.server_process.pid) or is_server_listening(context.server_fqdn, context.server_port):
+ server_kill(context)
  time.sleep(0.1)
  attempts += 1
  if attempts > 5:
- print(f"Server dangling exits, killing all {context.server_path} ...")
- process = subprocess.run(['killall', '-9', context.server_path],
- stderr=subprocess.PIPE,
- universal_newlines=True)
- print(process)
+ server_kill_hard(context)
+
+
+def server_graceful_shutdown(context):
+ print(f"shutting down server pid={context.server_process.pid} ...\n")
+ if os.name == 'nt':
+ os.kill(context.server_process.pid, signal.CTRL_C_EVENT)
+ else:
+ os.kill(context.server_process.pid, signal.SIGINT)
+
+
+def server_kill(context):
+ print(f"killing server pid={context.server_process.pid} ...\n")
+ context.server_process.kill()
+
+
+def server_kill_hard(context):
+ pid = context.server_process.pid
+ path = context.server_path
+
+ print(f"Server dangling exits, hard killing force {pid}={path}...\n")
+ if os.name == 'nt':
+ process = subprocess.check_output(['taskkill', '/F', '/pid', str(pid)]).decode()
+ print(process)
+ else:
+ os.kill(-pid, signal.SIGKILL)
 
 
 def is_server_listening(server_fqdn, server_port):
  with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
  result = sock.connect_ex((server_fqdn, server_port))
- return result == 0
+ _is_server_listening = result == 0
+ if _is_server_listening:
+ print(f"server is listening on {server_fqdn}:{server_port}...\n")
+ return _is_server_listening
 
 
 def pid_exists(pid):
  """Check whether pid exists in the current process table."""
- import errno
  if pid < 0:
  return False
- try:
- os.kill(pid, 0)
- except OSError as e:
- return e.errno == errno.EPERM
+ if os.name == 'nt':
+ output = subprocess.check_output(['TASKLIST', '/FI', f'pid eq {pid}']).decode()
+  print(output)
+ return "No tasks are running" not in output
  else:
- return True
+ try:
+ os.kill(pid, 0)
+ except OSError as e:
+ return e.errno == errno.EPERM
+ else:
+ return True
diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature
@@ -47,7 +47,7 @@ Feature: llama.cpp server
  Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
  """
  And a completion request with no api error
- Then 64 tokens are predicted matching fun|Annaks|popcorns
+ Then 64 tokens are predicted matching fun|Annaks|popcorns|pictry
  And the completion is truncated
  And 109 prompt tokens are processed