Skip to content

Commit

Permalink
MR-link-2 for R in working condition. Not as featureful as the python…
Browse files Browse the repository at this point in the history
… version, but will produce the same results
  • Loading branch information
adriaan-vd-graaf committed Oct 7, 2024
1 parent 73fb48d commit ae3eb0b
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 8 deletions.
81 changes: 80 additions & 1 deletion R/mr_link_2_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ mr_link2 <- function(selected_eigenvalues, selected_eigenvectors,

# Define optimization options
method <- "Nelder-Mead"
control <- list(maxit = 100000)
control <- list(maxit = 300)

# Calculate c_x and c_y
c_x <- t(selected_eigenvectors) %*% exposure_betas
Expand Down Expand Up @@ -207,4 +207,83 @@ mr_link2 <- function(selected_eigenvalues, selected_eigenvectors,
optim_ha_nit = ha_results$counts,
function_time = as.numeric(difftime(Sys.time(), start_time, units = "secs"))
)
}

remove_highly_correlated <- function(ld_matrix, snp_ordering, max_correlation) {
# Remove NAs from the correlation matrix
ld_matrix[is.na(ld_matrix)] <- 0

# Initialize a logical vector to mark rows/columns to remove
idxs_to_remove <- rep(FALSE, nrow(ld_matrix))

# Find pairs of SNPs that are highly correlated
correlated_indices <- which(abs(ld_matrix) >= max_correlation, arr.ind = TRUE)

to_remove <- vector("logical", length = nrow(ld_matrix))

# Loop through correlated pairs and mark SNPs to remove
for (i in seq_len(nrow(correlated_indices))) {
a <- correlated_indices[i, 1]
b <- correlated_indices[i, 2]

if ((!to_remove[a] && !to_remove[b] ) && (a != b)) {
to_remove[b] <- TRUE
}
}

# Remove highly correlated SNPs from both the matrix and the SNP list
ld_matrix <- ld_matrix[!to_remove, !to_remove]
pruned_snps <- snp_ordering[!to_remove]

return(list(ld_matrix = ld_matrix, pruned_snps = pruned_snps))
}

mr_link2_analysis <- function(exposure_betas, outcome_betas, ld_matrix, n_exp, n_out, max_correlation = 0.99) {
# Prune highly correlated SNPs
snp_ordering <- seq_len(nrow(ld_matrix)) # Simple numbering for SNPs
pruned_data <- remove_highly_correlated(ld_matrix, snp_ordering, max_correlation)
pruned_ld_matrix <- pruned_data$ld_matrix
pruned_snps <- pruned_data$pruned_snps

# Eigenvalue decomposition
eigen_decomp <- eigen(pruned_ld_matrix)
eigenvalues <- eigen_decomp$values
eigenvectors <- eigen_decomp$vectors

# Calculate cumulative variance explained
variance_explained <- cumsum(eigenvalues) / sum(eigenvalues)

# Select the eigenvectors that explain at least 99% of the variance
threshold_index <- min(which(variance_explained >= 0.99))
selected_eigenvectors <- eigenvectors[, 1:threshold_index]
selected_eigenvalues <- eigenvalues[1:threshold_index]

# Call the existing MR-link-2 function
# Assuming `mr_link2` is already defined in R and takes the following arguments:
# - selected_eigenvalues
# - selected_eigenvectors
# - exposure_betas
# - outcome_betas
# - n_exp (number of individuals in exposure dataset)
# - n_out (number of individuals in outcome dataset)
sigma_exp_guess <- 0.01 # You can adjust or estimate this as needed
sigma_out_guess <- 0.001 # You can adjust or estimate this as needed

# Subset the betas based on pruned SNPs
exposure_betas_pruned <- exposure_betas[pruned_snps]
outcome_betas_pruned <- outcome_betas[pruned_snps]

# Perform the MR-link-2 estimation
mr_result <- mr_link2(
selected_eigenvalues = selected_eigenvalues,
selected_eigenvectors = selected_eigenvectors,
exposure_betas = exposure_betas_pruned,
outcome_betas = outcome_betas_pruned,
n_exp = n_exp,
n_out = n_out,
sigma_exp_guess = sigma_exp_guess,
sigma_out_guess = sigma_out_guess
)

return(mr_result)
}
5 changes: 0 additions & 5 deletions tests/integration_tests_mr_link_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,6 @@
from mr_link_2_standalone import *


""""
NB. These tests require us to have plink in your path.
So unfortunately I will not test them in the github workflow.
"""

def test_plink_version():
result = subprocess.run(['plink', '--version'], capture_output=True, text=True)
assert result.returncode == 0, "PLINK is not installed or not in the PATH"
Expand Down
86 changes: 84 additions & 2 deletions tests/test_for_r_and_python_concordance.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
## for the R part

from rpy2.robjects import r, numpy2ri, default_converter
from rpy2.robjects.vectors import FloatVector
from rpy2.robjects.vectors import FloatVector, ListVector, StrVector

from rpy2.robjects.packages import importr
from rpy2.robjects.conversion import localconverter

Expand All @@ -22,7 +23,7 @@
# Load the base R package
base = importr('base')
# Source the R file with the functions
r['source']('../R/mr_link_2_functions.R')
r['source'](os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'R', 'mr_link_2_functions.R')))

"""
Now, we perform unit tests on the MR-link-2 functions. to ensure that there is no creep of the
Expand Down Expand Up @@ -423,3 +424,84 @@ def test_mr_link2_equivalence_fuzz(selected_eigenvalues, selected_eigenvectors,
for key in low_precision_keys:
assert np.isclose(python_result[key], r_result_dict[key], rtol=2.5e-2, ## this is pretty suboptimal, but as this also includes P values, there will not be a crazy amount of difference here.
atol=1e-8), f"Mismatch for {key}: Python={python_result[key]}, R={r_result_dict[key]}"


def test_remove_highly_correlated():
"""Test that the highly correlated SNPs are correctly removed."""

# Example LD matrix with correlation values
ld_matrix = np.array([[1.0, 0.9, 0.1],
[0.9, 1.0, 0.05],
[0.1, 0.05, 1.0]])

snp_ordering = ["SNP1", "SNP2", "SNP3"]
max_correlation = 0.8 # Set the maximum correlation threshold

with localconverter(numpy2ri.converter):
r_ld_matrix = r.matrix(ld_matrix, nrow=3)
# Wrap the snp_ordering list into a dictionary format for ListVector
r_snp_ordering = StrVector(snp_ordering)

# Call the R function `remove_highly_correlated`
r_pruned_result = r['remove_highly_correlated'](r_ld_matrix, r_snp_ordering, max_correlation)

# Extract results from R output
pruned_ld_matrix = np.array(r_pruned_result['ld_matrix'])
pruned_snps = list(r_pruned_result['pruned_snps'])

# Expected pruned results (SNP2 is removed)
expected_pruned_ld_matrix = np.array([[1.0, 0.05], [0.05, 1.0]])
expected_pruned_snps = ["SNP2", "SNP3"]

# Assertions
assert np.allclose(pruned_ld_matrix, expected_pruned_ld_matrix), "LD matrix was not pruned correctly."
assert pruned_snps == expected_pruned_snps, f"SNP pruning failed. Expected {expected_pruned_snps}, got {pruned_snps}"

# Test function for eigenvalue decomposition and MR-link-2 analysis
def test_mr_link2_analysis():
"""Test the MR-link-2 analysis on synthetic data."""

# Synthetic data
np.random.seed(42)
exposure_betas = np.random.normal(size=100)
outcome_betas = np.random.normal(size=100)
ld_matrix = np.random.normal(0, 1, (100, 100))
ld_matrix = (ld_matrix + ld_matrix.T) / 2 # Make it symmetric
np.fill_diagonal(ld_matrix, 1) # Ensure diagonals are 1s (perfect correlation with self)

n_exp = 1000
n_out = 1000
max_correlation = 0.9

# Convert inputs to R-compatible types
with localconverter(numpy2ri.converter):
r_exposure_betas = FloatVector(exposure_betas)
r_outcome_betas = FloatVector(outcome_betas)
r_ld_matrix = r.matrix(ld_matrix, nrow=100)
r_n_exp = n_exp
r_n_out = n_out
r_max_correlation = max_correlation

# Call the R function `mr_link2_analysis`
r_mr_result = r['mr_link2_analysis'](r_exposure_betas, r_outcome_betas, r_ld_matrix, r_n_exp, r_n_out,
r_max_correlation)

# Convert the result back to a Python-friendly format
mr_result_dict = dict(r_mr_result)

# Check the output has expected keys and ranges
assert 'alpha' in mr_result_dict, "Key 'alpha' missing in MR result."
assert 'p(alpha)' in mr_result_dict, "Key 'p(alpha)' missing in MR result."
assert 'sigma_x' in mr_result_dict, "Key 'sigma_x' missing in MR result."

# Example value checks (can be replaced with actual expected ranges based on real data)
assert np.isfinite(mr_result_dict['alpha']), "Alpha value is not finite."
assert 0 <= mr_result_dict['p(alpha)'] <= 1, "P-value for alpha is out of bounds."
assert mr_result_dict['sigma_x'] > 0, "Sigma_x should be positive."

print("MR-link-2 result passed all checks.")


# Run the tests
if __name__ == '__main__':
pytest.main()

0 comments on commit ae3eb0b

Please sign in to comment.