optimize(utils): device selection & jit

2noise · Oct 10, 2024 · b3a180e · b3a180e
1 parent a1aebd4
commit b3a180e
Show file tree

Hide file tree

Showing 3 changed files with 15 additions and 30 deletions.
diff --git a/ChatTTS/norm.py b/ChatTTS/norm.py
@@ -10,15 +10,15 @@
 from .utils import del_all
 
 
-@jit
+@jit(nopython=True)
 def _find_index(table: np.ndarray, val: np.uint16):
     for i in range(table.size):
         if table[i] == val:
             return i
     return -1
 
 
-@jit
+@jit(nopython=True)
 def _fast_replace(
     table: np.ndarray, text: bytes
 ) -> Tuple[np.ndarray, List[Tuple[str, str]]]:
@@ -34,7 +34,7 @@ def _fast_replace(
     return result, replaced_words
 
 
-@jit
+@jit(nopython=True)
 def _split_tags(text: str) -> Tuple[List[str], List[str]]:
     texts: List[str] = []
     tags: List[str] = []
@@ -57,7 +57,7 @@ def _split_tags(text: str) -> Tuple[List[str], List[str]]:
     return texts, tags
 
 
-@jit
+@jit(nopython=True)
 def _combine_tags(texts: List[str], tags: List[str]) -> str:
     text = ""
     for t in texts:

diff --git a/ChatTTS/utils/gpu.py b/ChatTTS/utils/gpu.py
@@ -9,43 +9,28 @@
 
 
 def select_device(min_memory=2047, experimental=False):
-    if torch.cuda.is_available():
-        selected_gpu = 0
-        max_free_memory = -1
-        for i in range(torch.cuda.device_count()):
-            props = torch.cuda.get_device_properties(i)
-            free_memory = props.total_memory - torch.cuda.memory_reserved(i)
-            if max_free_memory < free_memory:
-                selected_gpu = i
-                max_free_memory = free_memory
-        free_memory_mb = max_free_memory / (1024 * 1024)
-        if free_memory_mb < min_memory:
-            logger.get_logger().warning(
-                f"GPU {selected_gpu} has {round(free_memory_mb, 2)} MB memory left. Switching to CPU."
-            )
-            device = torch.device("cpu")
-        else:
-            device = torch.device(f"cuda:{selected_gpu}")
-    elif _is_torch_npu_available():
+    has_cuda = torch.cuda.is_available()
+    if has_cuda or _is_torch_npu_available():
+        provider = torch.cuda if has_cuda else torch.npu
         """
         Using Ascend NPU to accelerate the process of inferencing when GPU is not found.
         """
-        selected_npu = 0
+        dev_idx = 0
         max_free_memory = -1
-        for i in range(torch.npu.device_count()):
-            props = torch.npu.get_device_properties(i)
-            free_memory = props.total_memory - torch.npu.memory_reserved(i)
+        for i in range(provider.device_count()):
+            props = provider.get_device_properties(i)
+            free_memory = props.total_memory - provider.memory_reserved(i)
             if max_free_memory < free_memory:
-                selected_npu = i
+                dev_idx = i
                 max_free_memory = free_memory
         free_memory_mb = max_free_memory / (1024 * 1024)
         if free_memory_mb < min_memory:
             logger.get_logger().warning(
-                f"NPU {selected_npu} has {round(free_memory_mb, 2)} MB memory left. Switching to CPU."
+                f"{provider.device(dev_idx)} has {round(free_memory_mb, 2)} MB memory left. Switching to CPU."
             )
             device = torch.device("cpu")
         else:
-            device = torch.device(f"npu:{selected_npu}")
+            device = provider.device(dev_idx)
     elif torch.backends.mps.is_available():
         """
         Currently MPS is slower than CPU while needs more memory and core utility,

diff --git a/tools/audio/np.py b/tools/audio/np.py
@@ -4,7 +4,7 @@
 from numba import jit
 
 
-@jit
+@jit(nopython=True)
 def float_to_int16(audio: np.ndarray) -> np.ndarray:
     am = int(math.ceil(float(np.abs(audio).max())) * 32768)
     am = 32767 * 32768 // am