From 4aa1248f5a049640775c5ab64502a730fa961f8a Mon Sep 17 00:00:00 2001 From: Anneshu Nag <132702983+NK-Works@users.noreply.github.com> Date: Mon, 4 Nov 2024 19:57:09 +0530 Subject: [PATCH] added levenshtein-distance algo --- .../Levenshtein Distance/README.md | 71 +++++++++++++++++++ .../Levenshtein Distance/program.c | 65 +++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 Dynamic Programming/Levenshtein Distance/README.md create mode 100644 Dynamic Programming/Levenshtein Distance/program.c diff --git a/Dynamic Programming/Levenshtein Distance/README.md b/Dynamic Programming/Levenshtein Distance/README.md new file mode 100644 index 00000000..955a6911 --- /dev/null +++ b/Dynamic Programming/Levenshtein Distance/README.md @@ -0,0 +1,71 @@ +# Levenshtein Distance in C + +## Problem Statement +The Levenshtein distance between two strings is the minimum number of single-character edits (insertions, deletions, or substitutions) required to transform one string into the other. This algorithm is commonly used in spell checkers, DNA sequencing, and natural language processing. + +### Examples + +#### Example 1 +**Input:** +s1 = "kitten" s2 = "sitting" +**Output:** +3 + +**Explanation:** +The minimum edits to transform "kitten" to "sitting" are: +1. Substitute "k" with "s" +2. Substitute "e" with "i" +3. Append "g" + +#### Example 2 +**Input:** +s1 = "flaw" s2 = "lawn" +**Output:** +2 + +**Explanation:** +The minimum edits to transform "flaw" to "lawn" are: +1. Substitute "f" with "l" +2. Substitute "w" with "n" + +--- + +## Approach +This solution uses Dynamic Programming (DP) to calculate the Levenshtein distance between two strings efficiently. + +### DP Recurrence Relation +Define `dp[i][j]` as the Levenshtein distance between the first `i` characters of `s1` and the first `j` characters of `s2`. + +1. **If characters match** (`s1[i-1] == s2[j-1]`): + - `dp[i][j] = dp[i-1][j-1]` +2. **If characters do not match**: + - `dp[i][j] = 1 + min(dp[i-1][j], dp[i][j-1], dp[i-1][j-1])` + + where: + - `dp[i-1][j] + 1` represents a deletion. + - `dp[i][j-1] + 1` represents an insertion. + - `dp[i-1][j-1] + 1` represents a substitution. + +### Complexity +- **Time Complexity:** \(O(n \times m)\), where `n` is the length of `s1` and `m` is the length of `s2`. +- **Space Complexity:** \(O(n \times m)\), as we use a 2D array to store the distances. + +--- + +## Code +The full code is available in `program.c`. + +## Running the Code + +### Prerequisites +Ensure you have a C compiler installed, such as GCC. + +### Instructions +1. Clone this repository. +2. Compile the C file: + + ```bash + gcc program.c -o program + +3. Run the compiled program: + ./program diff --git a/Dynamic Programming/Levenshtein Distance/program.c b/Dynamic Programming/Levenshtein Distance/program.c new file mode 100644 index 00000000..87283b49 --- /dev/null +++ b/Dynamic Programming/Levenshtein Distance/program.c @@ -0,0 +1,65 @@ +#include +#include +#include + +// Function to calculate the minimum of three values +int min(int a, int b, int c) { + int min = a; + if (b < min) min = b; + if (c < min) min = c; + return min; +} + +// Function to calculate Levenshtein distance +int levenshteinDistance(const char *s1, const char *s2) { + int len1 = strlen(s1); + int len2 = strlen(s2); + + // Create a 2D array to store distances + int **dp = (int **)malloc((len1 + 1) * sizeof(int *)); + for (int i = 0; i <= len1; i++) { + dp[i] = (int *)malloc((len2 + 1) * sizeof(int)); + } + + // Initialize base cases + for (int i = 0; i <= len1; i++) { + dp[i][0] = i; // Distance of any first string to an empty second string + } + for (int j = 0; j <= len2; j++) { + dp[0][j] = j; // Distance of any second string to an empty first string + } + + // Fill the dp array + for (int i = 1; i <= len1; i++) { + for (int j = 1; j <= len2; j++) { + if (s1[i - 1] == s2[j - 1]) { + dp[i][j] = dp[i - 1][j - 1]; // Characters match, no cost + } else { + dp[i][j] = min( + dp[i - 1][j] + 1, // Deletion + dp[i][j - 1] + 1, // Insertion + dp[i - 1][j - 1] + 1 // Substitution + ); + } + } + } + + int distance = dp[len1][len2]; + + // Free allocated memory + for (int i = 0; i <= len1; i++) { + free(dp[i]); + } + free(dp); + + return distance; +} + +int main() { + const char *s1 = "kitten"; + const char *s2 = "sitting"; + + printf("Levenshtein distance between '%s' and '%s' is %d\n", s1, s2, levenshteinDistance(s1, s2)); + + return 0; +}