17
17
package org .opencb .cellbase .lib .builders .clinical .variant ;
18
18
19
19
import com .fasterxml .jackson .core .JsonProcessingException ;
20
+ import com .fasterxml .jackson .databind .ObjectMapper ;
21
+ import com .fasterxml .jackson .databind .ObjectReader ;
20
22
import org .apache .commons .lang3 .StringUtils ;
21
23
import org .opencb .biodata .formats .variant .clinvar .rcv .ClinvarParser ;
22
24
import org .opencb .biodata .formats .variant .clinvar .rcv .v64jaxb .*;
23
25
import org .opencb .biodata .models .variant .avro .*;
26
+ import org .opencb .cellbase .core .models .DataReleaseSource ;
24
27
import org .opencb .cellbase .lib .EtlCommons ;
25
28
import org .opencb .cellbase .lib .variant .VariantAnnotationUtils ;
26
29
import org .opencb .commons .ProgressLogger ;
41
44
import java .util .stream .Collectors ;
42
45
import java .util .stream .Stream ;
43
46
44
- import static org .opencb .cellbase .lib .EtlCommons .CLINVAR_DATE ;
45
- import static org .opencb .cellbase .lib .EtlCommons .CLINVAR_VERSION ;
47
+ import static org .opencb .cellbase .lib .EtlCommons .*;
46
48
47
49
//import org.opencb.biodata.formats.variant.clinvar.v24jaxb.*;
48
50
@@ -83,6 +85,10 @@ public class ClinVarIndexer extends ClinicalIndexer {
83
85
private final Path clinvarVariationAlleleFile ;
84
86
private final Path clinvarEFOFile ;
85
87
private final String assembly ;
88
+
89
+ private String version ;
90
+ private String date ;
91
+
86
92
private int numberSomaticRecords = 0 ;
87
93
private int numberGermlineRecords = 0 ;
88
94
private int numberNoDiseaseTrait = 0 ;
@@ -98,18 +104,32 @@ public ClinVarIndexer(Path clinvarXMLFiles, Path clinvarSummaryFile, Path clinva
98
104
Path clinvarEFOFile , boolean normalize , Path genomeSequenceFilePath , String assembly ,
99
105
RocksDB rdb ) throws IOException {
100
106
super (genomeSequenceFilePath );
101
- this . rdb = rdb ;
107
+
102
108
this .clinvarXMLFiles = clinvarXMLFiles ;
103
109
this .clinvarSummaryFile = clinvarSummaryFile ;
104
110
this .clinvarVariationAlleleFile = clinvarVariationAlleleFile ;
105
111
this .clinvarEFOFile = clinvarEFOFile ;
106
112
this .normalize = normalize ;
107
113
this .genomeSequenceFilePath = genomeSequenceFilePath ;
108
114
this .assembly = assembly ;
115
+
116
+ this .rdb = rdb ;
109
117
}
110
118
111
119
public void index () throws RocksDBException {
112
120
try {
121
+ Path clinvarVersionPath = clinvarSummaryFile .getParent ().resolve (CLINVAR_VERSION_FILENAME );
122
+ if (!Files .exists (clinvarVersionPath )) {
123
+ throw new IOException ("ClinVar version file " + clinvarVersionPath + " does not exist" );
124
+ }
125
+ ObjectMapper jsonObjectMapper = new ObjectMapper ();
126
+ ObjectReader jsonObjectReader = jsonObjectMapper .readerFor (DataReleaseSource .class );
127
+ DataReleaseSource dataReleaseSource = jsonObjectReader .readValue (clinvarVersionPath .toFile ());
128
+
129
+ this .date = dataReleaseSource .getDate ();
130
+ this .version = dataReleaseSource .getVersion ();
131
+
132
+
113
133
Map <String , EFO > traitsToEfoTermsMap = loadEFOTerms ();
114
134
Map <String , List <AlleleLocationData >> rcvToAlleleLocationData = parseVariantSummary (traitsToEfoTermsMap );
115
135
@@ -156,15 +176,9 @@ public boolean accept(File dir, String name) {
156
176
}
157
177
logger .info ("Done" );
158
178
printSummary ();
159
- } catch (RocksDBException e ) {
160
- logger .error ("Error reading/writing from/to the RocksDB index while indexing ClinVar" );
161
- throw e ;
162
- } catch (JAXBException e ) {
163
- logger .error ("Error unmarshalling clinvar Xml file: " + e .getMessage ());
164
- e .printStackTrace ();
165
- } catch (IOException e ) {
166
- logger .error ("Error indexing clinvar Xml file: " + e .getMessage ());
167
- e .printStackTrace ();
179
+ } catch (RocksDBException | JAXBException | IOException e ) {
180
+ logger .error ("Error indexing ClinVar" , e );
181
+ throw new RocksDBException (e .getMessage ());
168
182
}
169
183
}
170
184
@@ -331,7 +345,7 @@ private void addNewEntries(VariantAnnotation variantAnnotation, String variation
331
345
String mateVariantString , String clinicalHaplotypeString ,
332
346
Map <String , EFO > traitsToEfoTermsMap ) {
333
347
334
- EvidenceSource evidenceSource = new EvidenceSource (EtlCommons .CLINVAR_DATA , CLINVAR_VERSION , CLINVAR_DATE );
348
+ EvidenceSource evidenceSource = new EvidenceSource (EtlCommons .CLINVAR_DATA , version , date );
335
349
// Create a set to avoid situations like germline;germline;germline
336
350
List <AlleleOrigin > alleleOrigin = null ;
337
351
if (!EtlCommons .isMissing (lineFields [VARIANT_SUMMARY_ORIGIN_COLUMN ])) {
@@ -412,7 +426,7 @@ private void addNewEntries(VariantAnnotation variantAnnotation, PublicSetType pu
412
426
throws JsonProcessingException {
413
427
414
428
List <Property > additionalProperties = new ArrayList <>(3 );
415
- EvidenceSource evidenceSource = new EvidenceSource (EtlCommons .CLINVAR_DATA , CLINVAR_VERSION , CLINVAR_DATE );
429
+ EvidenceSource evidenceSource = new EvidenceSource (EtlCommons .CLINVAR_DATA , version , date );
416
430
// String accession = publicSet.getReferenceClinVarAssertion().getClinVarAccession().getAcc();
417
431
418
432
VariantClassification variantClassification = getVariantClassification (
0 commit comments