fix: fix linter warnings

elizaveta-andreeva · elizaveta-andreeva · commit e1ad3023465c · 2025-04-21T21:55:47.000+03:00
diff --git a/Dockerfile b/Dockerfile
@@ -1,58 +1,60 @@
-FROM python:3.10 AS base
+    FROM python:3.10 AS base
 
-ARG TELEGRAM_TOKEN
+    ARG TELEGRAM_TOKEN
 
-ENV RUVERSION 0.22
-ENV TZ 'Europe/Moscow'
+    ENV RUVERSION 0.22
+    ENV TZ 'Europe/Moscow'
 
-RUN apt-get update && \
-    DEBIAN_FRONTEND=noninteractive apt-get install -y \
-    ffmpeg="7:5.1.6-0+deb12u1" \
-    espeak="1.48.15+dfsg-3" \
-    libsm6="2:1.2.3-1" \
-    libxext6="2:1.3.4-1+b1" \
-    git="1:2.39.5-0+deb12u2"  \
-    tzdata="2024b-0+deb12u1" && \
-    apt-get clean
+    RUN apt-get update && \
+        DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-downgrades\
+        ffmpeg="7:5.1.6-0+deb12u1" \
+        espeak="1.48.15+dfsg-3" \
+        libsm6="2:1.2.3-1" \
+        libxext6="2:1.3.4-1+b1" \
+        git="1:2.39.5-0+deb12u2"  \
+        tzdata="2025a-0+deb12u1" && \
+        apt-get clean
 
-RUN echo $TZ > /etc/timezone && \
-    rm /etc/localtime && \
-    ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && \
-    dpkg-reconfigure -f noninteractive tzdata
+    RUN echo $TZ > /etc/timezone && \
+        rm /etc/localtime && \
+        ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && \
+        dpkg-reconfigure -f noninteractive tzdata
 
 
-FROM base AS emotion_recognition
+    FROM base AS emotion_recognition
 
-RUN pip3 install --no-cache-dir \
-    torch==1.13.1+cu117 \
-    torchvision>=0.13.1+cu117 \
-    torchaudio>=0.13.1+cu117 --extra-index-url https://download.pytorch.org/whl/cu117
+    RUN pip3 install --no-cache-dir \
+        torch==1.13.1+cu117 \
+        torchvision>=0.13.1+cu117 \
+        torchaudio>=0.13.1+cu117 --extra-index-url https://download.pytorch.org/whl/cu117
 
-WORKDIR /bot/emotion_recognition
+    WORKDIR /bot/emotion_recognition
 
-RUN git clone https://github.com/elizaveta-andreeva/Video-Audio-Face-Emotion-Recognition.git . && \
-    git checkout 63f8b64b456fa3cdfd89379660a171145ea256cb
+    RUN git clone https://github.com/elizaveta-andreeva/Video-Audio-Face-Emotion-Recognition.git . && \
+        git checkout 9b3368ccdce1d571a362543633ade81eb0cbd622
 
-RUN git clone https://github.com/rishiswethan/pytorch_utils.git source/pytorch_utils && \
-    cd source/pytorch_utils && \
-    git checkout v1.0.3 && \
-    cd ../.. && \
-    python3 setup.py
+    RUN git clone https://github.com/rishiswethan/pytorch_utils.git source/pytorch_utils && \
+        cd source/pytorch_utils && \
+        git checkout v1.0.3 && \
+        cd ../.. && \
+        python3 setup.py
 
-RUN pip3 install --no-cache-dir -r requirements.txt
-RUN python3 -m spacy download en_core_web_lg
+    RUN pip3 install --no-cache-dir -r requirements.txt
+    RUN python3 -m spacy download en_core_web_lg
 
 
-FROM emotion_recognition AS phsycho_bot
+    FROM emotion_recognition AS phsycho_bot
 
-WORKDIR /bot
+    WORKDIR /bot
 
-COPY src/requirements.txt src/
-RUN pip3 install --no-cache-dir -r src/requirements.txt
+    COPY src/requirements.txt src/
+    RUN pip3 install --no-cache-dir -r src/requirements.txt
 
-# COPY tests/requirements.txt tests/
-# RUN pip3 install -r tests/requirements.txt
+    # COPY tests/requirements.txt tests/
+    # RUN pip3 install -r tests/requirements.txt
 
-COPY . /bot/
+    COPY . /bot/
 
-CMD python3 src/bot.py ${TELEGRAM_TOKEN} ${MONGO_INITDB_ROOT_USERNAME} ${MONGO_INITDB_ROOT_PASSWORD}
+    ENV PYTHONPATH="/bot:/bot/emotion_recognition:${PYTHONPATH}"
+
+    CMD python3 src/bot.py ${TELEGRAM_TOKEN} ${MONGO_INITDB_ROOT_USERNAME} ${MONGO_INITDB_ROOT_PASSWORD}
diff --git a/src/databases/db.py b/src/databases/db.py
@@ -1,12 +1,6 @@
 import datetime
 import logging
 from typing import List, Optional
-from string import punctuation
-from collections import Counter
-import nltk
-from nltk.corpus import stopwords
-from pymystem3 import Mystem
-
 import pytz
 from pymodm import connect, fields, MongoModel
 from pymodm.connection import _get_db
@@ -202,7 +196,6 @@ def init_survey_progress(
         focus,
         id_=0,
         survey_step=0,
-        next_step=1,
         need_answer=False,
         user_answer="INIT PROGRESS",
         stats="",
diff --git a/src/emotion_analysis.py b/src/emotion_analysis.py
@@ -1,15 +1,9 @@
-import os
-import sys
 from string import punctuation
 from collections import Counter
 import nltk
 from nltk.corpus import stopwords
 from pymystem3 import Mystem
-
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'emotion_recognition')))
-
-from emotion_recognition.source.audio_analysis_utils.predict import predict
+from emotion_recognition.source.audio_analysis_utils.predict import predict  # pylint: disable=import-error
 
 
 def get_words_statistics(text: str):
@@ -24,7 +18,7 @@ def get_words_statistics(text: str):
 
 
 def predict_emotion(filename):
-    emotion, predicted_label, prediction_string = predict(filename)
+    emotion, _, _ = predict(filename)
     return emotion
 
 
diff --git a/src/kafka/kafka_operations.py b/src/kafka/kafka_operations.py
@@ -2,6 +2,6 @@
 
 
 def load_conf(conf_file):
-    with open(conf_file) as f:
-        conf_val = json.load(f)
+    with open(conf_file) as file:
+        conf_val = json.load(file)
         return conf_val
diff --git a/src/modules/stt_module/voice_module.py b/src/modules/stt_module/voice_module.py
@@ -2,10 +2,6 @@
 import os
 import time
 import subprocess
-from collections import Counter
-
-import requests
-
 from noisereduce import reduce_noise
 from scipy.io import wavfile
 from telegram import Update
@@ -20,52 +16,65 @@
 from utilities.wrapper import send_text
 from databases.db import push_user_survey_progress, init_user, get_user_audio
 
-from env_config import (DEBUG_MODE,
-                        DEBUG_ON, DEBUG_OFF, TOKEN)
+from env_config import (DEBUG_MODE, DEBUG_ON)
 from kafka.kafka_producer import produce_message
 
 
-def split_audio(wav_filename, unique_file_id, min_chunk_length=30000, max_chunk_length=40000, silence_thresh=-40, min_silence_len=500):
-    audio = AudioSegment.from_wav(wav_filename)
-    chunk_dir_name = os.path.join('emotion_recognition', 'input_files')
-    if not os.path.exists(chunk_dir_name):
-        os.makedirs(chunk_dir_name)
-    chunk_filenames = []
-    chunk_start_times = []
-
-    if len(audio) <= min_chunk_length:
-        chunk_filename = os.path.join(chunk_dir_name, unique_file_id + "_chunk_0.wav")
-        audio.export(chunk_filename, format="wav")
-        return [chunk_filename], [0]
-
+def get_silence_points(audio, min_silence_len, silence_thresh):
     silence_ranges = detect_silence(audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
-    silence_points = [(start + end) / 2 for start, end in silence_ranges]
+    return [(start + end) / 2 for start, end in silence_ranges]
 
+
+def generate_chunks(audio, silence_points, min_len, max_len):
     chunks = []
+    starts = []
     start = 0
 
     for silence in silence_points:
         chunk_length = silence - start
-        if min_chunk_length <= chunk_length <= max_chunk_length:
-            chunks.append(audio[start:silence])
-            chunk_start_times.append(start)
-            start = silence
-        elif chunk_length > max_chunk_length:
-            split_point = start + max_chunk_length
+        if min_len <= chunk_length <= max_len:
+            chunks.append(audio[start:int(silence)])
+            starts.append(start)
+            start = int(silence)
+        elif chunk_length > max_len:
+            split_point = start + max_len
             chunks.append(audio[start:split_point])
-            chunk_start_times.append(start)
+            starts.append(start)
             start = split_point
 
     if start < len(audio):
         chunks.append(audio[start:])
-        chunk_start_times.append(start)
+        starts.append(start)
+
+    return chunks, starts
+
 
+def export_chunks(chunks, base_dir, file_id):
+    filenames = []
     for i, chunk in enumerate(chunks):
-        chunk_filename = os.path.join(chunk_dir_name, unique_file_id + f"_chunk_{i}.wav")
-        chunk.export(chunk_filename, format="wav")
-        chunk_filenames.append(chunk_filename)
+        filename = os.path.join(base_dir, f"{file_id}_chunk_{i}.wav")
+        chunk.export(filename, format="wav")
+        filenames.append(filename)
+    return filenames
 
-    return chunk_filenames, chunk_start_times
+
+def split_audio(wav_filename, unique_file_id, min_chunk_length=30000, max_chunk_length=40000, silence_thresh=-40,
+                min_silence_len=500):
+    audio = AudioSegment.from_wav(wav_filename)
+    chunk_dir = os.path.join('emotion_recognition', 'input_files')
+    if not os.path.exists(chunk_dir):
+        os.makedirs(chunk_dir)
+
+    if len(audio) <= min_chunk_length:
+        filename = os.path.join(chunk_dir, f"{unique_file_id}_chunk_0.wav")
+        audio.export(filename, format="wav")
+        return [filename], [0]
+
+    silence_points = get_silence_points(audio, min_silence_len, silence_thresh)
+    chunks, start_times = generate_chunks(audio, silence_points, min_chunk_length, max_chunk_length)
+    filenames = export_chunks(chunks, chunk_dir, unique_file_id)
+
+    return filenames, start_times
 
 
 def download_voice(update: Update):
@@ -121,46 +130,63 @@ def work_with_audio(update: Update, context: CallbackContext):
     produce_message('stt', json.dumps(message))
 
 
-def audio_to_text(filename, ogg_filename, chunk_filenames, chunk_start_times, update_id, user):
-    processing_start_time = time.time()
-    input_sentence, stats_sentence = "", ""
-    emotions = Counter()
-    audio_emotions_statistics = []
+def process_chunk(chunk_filename, start_time):
+    response = get_att_whisper(chunk_filename)
 
-    for chunk_filename, start_time in zip(chunk_filenames, chunk_start_times):
-        response = get_att_whisper(chunk_filename)
+    if response.status_code != 200:
+        return None
 
-        if response.status_code == 200:
-            chunk_input_sentence = RecognizedSentence(response.json())
-        else:
-            return
+    sentence = RecognizedSentence(response.json())
+    word, emotion = associate_words_with_emotions(os.path.basename(chunk_filename), sentence.get_text())
 
-        word, emotion = associate_words_with_emotions(chunk_filename.split('/')[-1], chunk_input_sentence.get_text())
-        emotions.update([emotion])
+    return {
+        "text": sentence.get_text(),
+        "stats": sentence.generate_stats(),
+        "emotion": emotion,
+        "word": word,
+        "start_time": start_time,
+        "filename": chunk_filename,
+    }
 
-        text = chunk_input_sentence.get_text()
 
-        audio_emotions_statistics.append({"filename": chunk_filename, "emotion": emotion, "word": word, "text": text, "start_time": start_time})
+def audio_to_text(filename, ogg_filename, chunk_filenames, chunk_start_times, update_id, user):
+    start_time = time.time()
+    full_text = []
+    stats_blocks = []
+    emotion_stats = []
+
+    for chunk_filename, start_time_chunk in zip(chunk_filenames, chunk_start_times):
+        result = process_chunk(chunk_filename, start_time_chunk)
+        if not result:
+            return
 
-        input_sentence += text
-        stats_sentence += chunk_input_sentence.generate_stats() + "\n"
+        full_text.append(result["text"])
+        stats_blocks.append(result["stats"])
+        emotion_stats.append({
+            "filename": result["filename"],
+            "emotion": result["emotion"],
+            "word": result["word"],
+            "text": result["text"],
+            "start_time": result["start_time"]
+        })
 
     if DEBUG_MODE == DEBUG_ON:
-        processing_end_time = time.time()
-        processing_time = processing_end_time - processing_start_time
-        send_text(user.id, f"Processing time: {processing_time:.2f} seconds")
+        elapsed = time.time() - start_time
+        send_text(user.id, f"Processing time: {elapsed:.2f} seconds")
 
     push_user_survey_progress(
-        user,
-        init_user(user).get_last_focus(),
-        update_id,
-        user_answer=input_sentence,
-        stats=stats_sentence,
+        user=user,
+        focus=init_user(user).get_last_focus(),
+        id_=update_id,
+        user_answer="".join(full_text),
+        stats="\n".join(stats_blocks),
         audio_file=open(ogg_filename, 'rb'),  # pylint: disable=consider-using-with
-        audio_emotions_statistics=audio_emotions_statistics
+        audio_emotions_statistics=emotion_stats
     )
+
     os.remove(ogg_filename)
 
     if DEBUG_MODE == DEBUG_ON:
         print(get_user_audio(user))
-        send_text(user.id, "ID записи с твоим аудиосообщением в базе данных: " + str(json.loads(json_util.dumps(get_user_audio(user)))))
+        send_text(user.id, "ID записи с твоим аудиосообщением в базе данных: " +
+                  str(json.loads(json_util.dumps(get_user_audio(user)))))
diff --git a/src/my_cron.py b/src/my_cron.py
@@ -3,10 +3,9 @@
 import sys
 import time
 from collections import Counter
-
-import pytz
 from typing import List
 
+import pytz
 import schedule
 from telegram.ext import Updater
 
diff --git a/src/script_engine.py b/src/script_engine.py
@@ -153,11 +153,11 @@ def get_next_step(self) -> Step:
             self.survey_progress.save()
             step_number = self.survey_progress.survey_next
         # Генерация нового
-        new_step_info = Script(Parser(TREE_EXAMPLE_PATH).parse()).get_script(self.last_focus)[
-            step_number
-        ]
+        # new_step_info = Script(Parser(TREE_EXAMPLE_PATH).parse()).get_script(self.last_focus)[
+        #     step_number
+        # ]
         new_survey_progress = init_survey_progress(
-            self.user, self.last_focus, self.update.update_id, step_number, new_step_info['next']
+            user=self.user, focus=self.last_focus, id_=self.update.update_id, survey_step=step_number
         )
         next_step = Step(self.update, new_survey_progress, self.last_focus)
         return next_step
diff --git a/src/utilities/wrapper.py b/src/utilities/wrapper.py
@@ -74,4 +74,4 @@ def send_text(user_id, text):
         if response.status_code == 200:
             print('Request send successfully')
         else:
-            print(f'Error sending request: {response.json()["description"]}')
+            print(f'Error sending request: {response.json()["description"]}')