diff --git a/Machine_Learning_Algorithms/Naive Bayes Classifier Algorithm/Program.c b/Machine_Learning_Algorithms/Naive Bayes Classifier Algorithm/Program.c new file mode 100644 index 00000000..82bf92a6 --- /dev/null +++ b/Machine_Learning_Algorithms/Naive Bayes Classifier Algorithm/Program.c @@ -0,0 +1,90 @@ +#include +#include +#include + +#define NUM_CLASSES 2 // Spam and Not Spam +#define NUM_FEATURES 5 // Number of features (words) +#define NUM_SAMPLES 6 // Number of training samples + +// Sample data: Each row represents a document with features indicating presence of words (1) or absence (0) +int data[NUM_SAMPLES][NUM_FEATURES] = { + {1, 1, 0, 1, 0}, // Spam + {1, 0, 1, 1, 1}, // Spam + {0, 1, 1, 0, 0}, // Not Spam + {1, 1, 1, 0, 0}, // Not Spam + {0, 0, 1, 1, 1}, // Not Spam + {1, 0, 0, 0, 1} // Spam +}; + +// Labels for the training data (0 = Not Spam, 1 = Spam) +int labels[NUM_SAMPLES] = {1, 1, 0, 0, 0, 1}; + +// Function to calculate prior probabilities +void calculate_prior(double *prior) { + for (int i = 0; i < NUM_CLASSES; i++) { + prior[i] = 0; + } + + for (int i = 0; i < NUM_SAMPLES; i++) { + prior[labels[i]]++; + } + + for (int i = 0; i < NUM_CLASSES; i++) { + prior[i] /= NUM_SAMPLES; // Normalize + } +} + +// Function to calculate likelihoods +void calculate_likelihood(double likelihood[NUM_CLASSES][NUM_FEATURES]) { + for (int i = 0; i < NUM_CLASSES; i++) { + for (int j = 0; j < NUM_FEATURES; j++) { + likelihood[i][j] = 1; // Use Laplace smoothing + } + } + + for (int i = 0; i < NUM_SAMPLES; i++) { + for (int j = 0; j < NUM_FEATURES; j++) { + likelihood[labels[i]][j] += data[i][j]; + } + } + + for (int i = 0; i < NUM_CLASSES; i++) { + for (int j = 0; j < NUM_FEATURES; j++) { + likelihood[i][j] /= (prior[i] * NUM_SAMPLES + 2); // Smooth with total features + } + } +} + +// Function to predict the class for a given instance +int predict(int instance[NUM_FEATURES], double prior[NUM_CLASSES], double likelihood[NUM_CLASSES][NUM_FEATURES]) { + double posteriors[NUM_CLASSES] = {0}; + + for (int i = 0; i < NUM_CLASSES; i++) { + posteriors[i] = prior[i]; + for (int j = 0; j < NUM_FEATURES; j++) { + posteriors[i] *= likelihood[i][j] * instance[j]; + } + } + + return (posteriors[1] > posteriors[0]) ? 1 : 0; // Return class with higher probability +} + +int main() { + double prior[NUM_CLASSES]; + double likelihood[NUM_CLASSES][NUM_FEATURES]; + + // Calculate prior probabilities and likelihoods + calculate_prior(prior); + calculate_likelihood(likelihood); + + // Test instance (example: a new document) + int test_instance[NUM_FEATURES] = {1, 0, 1, 1, 0}; // New document to classify + + // Predict class for the test instance + int predicted_class = predict(test_instance, prior, likelihood); + + // Output the result + printf("Predicted class: %s\n", (predicted_class == 1) ? "Spam" : "Not Spam"); + + return 0; +} diff --git a/Machine_Learning_Algorithms/Naive Bayes Classifier Algorithm/README.md b/Machine_Learning_Algorithms/Naive Bayes Classifier Algorithm/README.md new file mode 100644 index 00000000..9e2b2360 --- /dev/null +++ b/Machine_Learning_Algorithms/Naive Bayes Classifier Algorithm/README.md @@ -0,0 +1,50 @@ +# Naive Bayes Classifier Algorithm in Machine Learning + +## Description +The Naive Bayes classifier is a probabilistic machine learning algorithm based on Bayes' Theorem, which is used for classification tasks. It assumes that the presence of a particular feature in a class is independent of the presence of any other feature. This assumption of conditional independence simplifies the computations involved in predicting the class of a given instance. + +## Key Features +- **Simplicity**: The algorithm is easy to understand and implement, making it suitable for beginners in machine learning. +- **Speed**: Naive Bayes is highly efficient and works well with large datasets due to its linear time complexity. +- **Scalability**: It performs well in multi-class problems, and it can handle a large number of features. +- **Robustness**: The algorithm can handle missing data and is less sensitive to irrelevant features. +- **Probabilistic Output**: Naive Bayes provides a probabilistic output, giving users an idea of the confidence level of predictions. + +## Problem Definition +The Naive Bayes classifier is typically used for problems where the goal is to classify an input into one of several predefined categories based on input features. Common examples include: +- **Text Classification**: Email spam detection, sentiment analysis, and document categorization. +- **Medical Diagnosis**: Predicting diseases based on symptoms. +- **Recommendation Systems**: Classifying items based on user preferences. + +## Algorithm Review +The Naive Bayes classifier works based on Bayes' Theorem, which states: + +\[ +P(C|X) = \frac{P(X|C) \cdot P(C)}{P(X)} +\] + +Where: +- \(P(C|X)\) is the posterior probability of class \(C\) given feature set \(X\). +- \(P(X|C)\) is the likelihood of feature set \(X\) given class \(C\). +- \(P(C)\) is the prior probability of class \(C\). +- \(P(X)\) is the prior probability of feature set \(X\). + +### Steps of the Algorithm: +1. **Data Preparation**: Collect and preprocess the data (e.g., feature extraction). +2. **Calculate Prior Probabilities**: Determine the probability of each class in the dataset. +3. **Calculate Likelihoods**: For each feature, calculate the likelihood of that feature occurring given each class. +4. **Make Predictions**: For a new instance, use Bayes' theorem to calculate the posterior probabilities for each class and assign the instance to the class with the highest probability. + +## Time Complexity +The time complexity of the Naive Bayes classifier is: +- **Training Phase**: \(O(n \cdot f)\), where \(n\) is the number of instances and \(f\) is the number of features. +- **Prediction Phase**: \(O(f)\) for each instance, as it requires calculating the likelihood for each feature. + +## Applications +- **Text Classification**: Effective in categorizing text into predefined classes, such as spam detection in emails or sentiment analysis in social media. +- **Medical Diagnosis**: Assisting in diagnosing diseases based on symptoms and patient history. +- **Recommendation Systems**: Classifying items to recommend to users based on their past behaviors. +- **Real-time Predictions**: Used in applications requiring quick predictions, such as fraud detection in financial transactions. + +## Conclusion +The Naive Bayes classifier is a powerful yet simple algorithm in machine learning that excels in text classification and many other applications. Its independence assumption makes it efficient for large datasets, although this assumption may not hold true in every case. Despite its simplicity, Naive Bayes often performs surprisingly well, making it a go-to choice for many classification tasks, especially when computational resources are limited or speed is essential. Overall, it is a fundamental algorithm that provides a solid foundation for understanding more complex classification methods.