@@ -57,7 +57,8 @@ def load_gtf(
57
57
gtf = gtf [gtf ["Feature" ] == "gene" ]
58
58
59
59
# Find common genes between GTF and marker scores
60
- common_gene = np .intersect1d (mk_score .index , gtf .gene_name )
60
+ # common_gene = np.intersect1d(mk_score.index, gtf.gene_name)
61
+ common_gene = list (set (mk_score .index ) & set (gtf .gene_name ))
61
62
logger .info (f"Found { len (common_gene )} common genes between GTF and marker scores" )
62
63
63
64
# Filter GTF and marker scores to common genes
@@ -69,6 +70,9 @@ def load_gtf(
69
70
70
71
# Process the GTF (open window around gene coordinates)
71
72
gtf_bed = gtf [["Chromosome" , "Start" , "End" , "gene_name" , "Strand" ]].copy ()
73
+ gtf_bed ["Chromosome" ] = gtf_bed ["Chromosome" ].apply (
74
+ lambda x : f"chr{ x } " if not str (x ).startswith ("chr" ) else x
75
+ )
72
76
gtf_bed .loc [:, "TSS" ] = gtf_bed ["Start" ]
73
77
gtf_bed .loc [:, "TED" ] = gtf_bed ["End" ]
74
78
@@ -128,7 +132,7 @@ def load_bim(bfile_root: str, chrom: int) -> tuple[pd.DataFrame, pr.PyRanges]:
128
132
- bim_pr is a PyRanges object with BIM data
129
133
"""
130
134
bim_file = f"{ bfile_root } .{ chrom } .bim"
131
- logger .debug (f"Loading BIM file: { bim_file } " )
135
+ logger .info (f"Loading BIM file: { bim_file } " )
132
136
133
137
bim = pd .read_csv (bim_file , sep = "\t " , header = None )
134
138
bim .columns = ["CHR" , "SNP" , "CM" , "BP" , "A1" , "A2" ]
@@ -311,6 +315,8 @@ def get_ldscore(
311
315
bfile_chr_prefix = f"{ bfile_root } .{ chrom } " , keep_snps = keep_snps_index
312
316
)
313
317
318
+ annot_matrix = annot_matrix [geno_array .kept_snps , :]
319
+
314
320
# Configure LD window based on specified unit
315
321
if ld_unit == "SNP" :
316
322
max_dist = ld_wind
0 commit comments