Skip to content

Commit

Permalink
Merge pull request #321 from ritzzcodes/patch-1
Browse files Browse the repository at this point in the history
 SuicidalDetection.py enhancement #issue 274
  • Loading branch information
Atharv714 authored Jul 24, 2024
2 parents 9798a99 + 910c9df commit cead717
Showing 1 changed file with 57 additions and 0 deletions.
57 changes: 57 additions & 0 deletions Suicide & Depression Detection/Models/SuicidalDetection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder

import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the dataset
df = pd.read_csv('/kaggle/input/suicide-watch/Suicide_Detection.csv')

# Check the column names
print(df.columns)

# Assuming the text data is in a column named 'text'
X = df['text']
y = df['class']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer(stop_words='english')

# Fit the vectorizer to the training data and transform both the training and testing data
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train a Logistic Regression model
lr_model = LogisticRegression()
lr_model.fit(X_train_tfidf, y_train)

# Evaluate the model on the testing data
y_pred = lr_model.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

plt.figure(figsize=(10, 8))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, cmap='Blues')
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()

# Plot the classification report
plt.figure(figsize=(10, 8))
sns.heatmap(pd.DataFrame(classification_report(y_test, y_pred, output_dict=True)).iloc[:-1, :].T, annot=True, cmap='Blues')
plt.xlabel('Metrics')
plt.ylabel('Classes')
plt.title('Classification Report')
plt.show()

0 comments on commit cead717

Please sign in to comment.