[0.1.6]new build relaese

ombhojane · Sep 26, 2024 · 16f7730 · 16f7730
1 parent c734b03
commit 16f7730
Show file tree

Hide file tree

Showing 11 changed files with 792 additions and 14 deletions.
diff --git a/build/lib/explainableai/core.py b/build/lib/explainableai/core.py
@@ -17,9 +17,11 @@
 from .feature_interaction import analyze_feature_interactions
 from .llm_explanations import initialize_gemini, get_llm_explanation, get_prediction_explanation
 from .report_generator import ReportGenerator
+from .model_selection import compare_models
 from reportlab.platypus import PageBreak
 
 
+
 class XAIWrapper:
     def __init__(self):
         self.model = None
@@ -35,19 +37,41 @@ def __init__(self):
         self.feature_importance = None
         self.results = None  # Add this line to store analysis results
 
-    def fit(self, model, X, y, feature_names=None):
-        self.model = model
+    def fit(self, models, X, y, feature_names=None):
+        if isinstance(models, dict):
+            self.models = models
+        else:
+            self.models = {'Model': models}
         self.X = X
         self.y = y
         self.feature_names = feature_names if feature_names is not None else X.columns.tolist()
-        self.is_classifier = hasattr(model, "predict_proba")
+        self.is_classifier = all(hasattr(model, "predict_proba") for model in self.models.values())
 
         print("Preprocessing data...")
         self._preprocess_data()
 
-        print("Fitting model and analyzing...")
+        print("Fitting models and analyzing...")
+        self.model_comparison_results = self._compare_models()
+
+        # Select the best model based on cv_score
+        best_model_name = max(self.model_comparison_results, key=lambda x: self.model_comparison_results[x]['cv_score'])
+        self.model = self.models[best_model_name]
         self.model.fit(self.X, self.y)
+
         return self
+
+    def _compare_models(self):
+        from sklearn.model_selection import cross_val_score
+        results = {}
+        for name, model in self.models.items():
+            cv_scores = cross_val_score(model, self.X, self.y, cv=5, scoring='roc_auc' if self.is_classifier else 'r2')
+            model.fit(self.X, self.y)
+            test_score = model.score(self.X, self.y)
+            results[name] = {
+                'cv_score': cv_scores.mean(),
+                'test_score': test_score
+            }
+        return results
 
     def _preprocess_data(self):
         # Identify categorical and numerical columns
@@ -106,12 +130,15 @@ def analyze(self):
         mean_score, std_score = cross_validate(self.model, self.X, self.y)
         results['cv_scores'] = (mean_score, std_score)
 
+        print("Model comparison results:")
+        results['model_comparison'] = self.model_comparison_results
+
         self._print_results(results)
 
         print("Generating LLM explanation...")
         results['llm_explanation'] = get_llm_explanation(self.gemini_model, results)
 
-        self.results = results  # Store the results in the instance
+        self.results = results
         return results
 
     def generate_report(self, filename='xai_report.pdf'):
@@ -121,6 +148,13 @@ def generate_report(self, filename='xai_report.pdf'):
         report = ReportGenerator(filename)
         report.add_heading("Explainable AI Report")
 
+        report.add_heading("Model Comparison", level=2)
+        model_comparison_data = [["Model", "CV Score", "Test Score"]]
+        for model, scores in self.results['model_comparison'].items():
+            model_comparison_data.append([model, f"{scores['cv_score']:.4f}", f"{scores['test_score']:.4f}"])
+        report.add_table(model_comparison_data)
+
+
         # Model Performance
         report.add_heading("Model Performance", level=2)
         for metric, value in self.results['model_performance'].items():

diff --git a/build/lib/explainableai/model_selection.py b/build/lib/explainableai/model_selection.py
@@ -1,17 +1,24 @@
 # model_selection.py
 from sklearn.linear_model import LogisticRegression
 from sklearn.ensemble import RandomForestClassifier
-from sklearn.svm import SVC
+from xgboost import XGBClassifier
+from sklearn.neural_network import MLPClassifier
 from sklearn.model_selection import cross_val_score
 from sklearn.metrics import roc_curve, auc
 import matplotlib.pyplot as plt
+import numpy as np
 
-def compare_models(X_train, y_train, X_test, y_test):
-    models = {
-        'Logistic Regression': LogisticRegression(),
-        'Random Forest': RandomForestClassifier(),
-        'SVM': SVC(probability=True)
+def get_default_models():
+    return {
+        'Logistic Regression': LogisticRegression(max_iter=1000),
+        'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
+        'XGBoost': XGBClassifier(n_estimators=100, random_state=42),
+        'Neural Network': MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=1000, random_state=42)
     }
+
+def compare_models(X_train, y_train, X_test, y_test, models=None):
+    if models is None:
+        models = get_default_models()
 
     results = {}
     for name, model in models.items():
@@ -42,5 +49,5 @@ def plot_roc_curves(models, X_test, y_test):
     plt.ylabel('True Positive Rate')
     plt.title('Receiver Operating Characteristic (ROC) Curve')
     plt.legend(loc="lower right")
-    plt.show()
+    plt.savefig('model_comparison_roc_curves.png')
     plt.close()
diff --git a/dist/explainableai-0.1.6-py3-none-any.whl b/dist/explainableai-0.1.6-py3-none-any.whl
diff --git a/dist/explainableai-0.1.6.tar.gz b/dist/explainableai-0.1.6.tar.gz
diff --git a/explainableai.egg-info/PKG-INFO b/explainableai.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: explainableai
-Version: 0.1.5
+Version: 0.1.6
 Summary: A comprehensive package for Explainable AI and model interpretation
 Home-page: https://github.com/ombhojane/explainableai
 Author: Om Bhojane, Palak Boricha
@@ -22,6 +22,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
+License-File: LICENSE.md
 Requires-Dist: numpy
 Requires-Dist: pandas
 Requires-Dist: scikit-learn

diff --git a/explainableai.egg-info/SOURCES.txt b/explainableai.egg-info/SOURCES.txt
@@ -1,3 +1,4 @@
+LICENSE.md
 README.md
 setup.py
 explainableai/__init__.py

diff --git a/explainableai/__pycache__/core.cpython-311.pyc b/explainableai/__pycache__/core.cpython-311.pyc
diff --git a/explainableai/__pycache__/llm_explanations.cpython-311.pyc b/explainableai/__pycache__/llm_explanations.cpython-311.pyc
diff --git a/explainableai/__pycache__/model_selection.cpython-311.pyc b/explainableai/__pycache__/model_selection.cpython-311.pyc