Skip to content

Commit

Permalink
Merge pull request #39 from cBioPortal/inc-seg
Browse files Browse the repository at this point in the history
(7/7) RFC79: Implement incremental upload of CNA segmented data
  • Loading branch information
forus authored Jun 19, 2024
2 parents 8c74dbb + d7e8ff3 commit d15c579
Show file tree
Hide file tree
Showing 14 changed files with 259 additions and 22 deletions.
1 change: 1 addition & 0 deletions scripts/importer/cbioportal_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ class MetaFileTypes(object):
MetaFileTypes.TIMELINE,
MetaFileTypes.GENE_PANEL_MATRIX,
MetaFileTypes.STRUCTURAL_VARIANT,
MetaFileTypes.SEG,
]

IMPORTER_CLASSNAME_BY_META_TYPE = {
Expand Down
27 changes: 25 additions & 2 deletions src/main/java/org/mskcc/cbio/portal/dao/DaoClinicalData.java
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,6 @@ public static List<ClinicalData> getSampleData(int cancerStudyId, Collection<Str
public static void removeSampleAttributesData(int sampleInternalId) throws DaoException {
Connection con = null;
PreparedStatement pstmt = null;
ResultSet rs = null;
try {
con = JdbcUtil.getDbConnection(DaoClinicalData.class);
pstmt = con.prepareStatement(SAMPLE_ATTRIBUTES_DELETE);
Expand All @@ -381,7 +380,31 @@ public static void removeSampleAttributesData(int sampleInternalId) throws DaoEx
throw new DaoException(e);
}
finally {
JdbcUtil.closeAll(DaoClinicalData.class, con, pstmt, rs);
JdbcUtil.closeAll(DaoClinicalData.class, con, pstmt, null);
}
}

public static void removeSampleAttributesData(Set<Integer> sampleInternalIds, String attrId) throws DaoException {
Connection con = null;
PreparedStatement pstmt = null;
try {
con = JdbcUtil.getDbConnection(DaoClinicalData.class);
pstmt = con.prepareStatement("DELETE FROM " + SAMPLE_ATTRIBUTES_TABLE
+ " WHERE `ATTR_ID` = ? AND `INTERNAL_ID` IN ("
+ String.join(",", Collections.nCopies(sampleInternalIds.size(), "?"))
+ ")");
int parameterIndex = 1;
pstmt.setString(parameterIndex++, attrId);
for (Integer sampleInternalId : sampleInternalIds) {
pstmt.setInt(parameterIndex++, sampleInternalId);
}
pstmt.executeUpdate();
}
catch (SQLException e) {
throw new DaoException(e);
}
finally {
JdbcUtil.closeAll(DaoClinicalData.class, con, pstmt, null);
}
}

Expand Down
41 changes: 37 additions & 4 deletions src/main/java/org/mskcc/cbio/portal/dao/DaoCopyNumberSegment.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ public static int addCopyNumberSegment(CopyNumberSegment seg) throws DaoExceptio
}
}

public static void createFractionGenomeAlteredClinicalData(int cancerStudyId) throws DaoException {
public static void createFractionGenomeAlteredClinicalData(int cancerStudyId, Set<Integer> sampleIds, boolean updateMode) throws DaoException {
Connection con = null;
PreparedStatement pstmt = null;
ResultSet rs = null;
Expand All @@ -80,8 +80,15 @@ public static void createFractionGenomeAlteredClinicalData(int cancerStudyId) th
"AS c2 WHERE c2.`CANCER_STUDY_ID` = c1.`CANCER_STUDY_ID` AND c2.`SAMPLE_ID` = c1.`SAMPLE_ID` AND " +
"ABS(c2.`SEGMENT_MEAN`) >= 0.2) / SUM(`END`-`START`)) AS `VALUE` FROM `copy_number_seg` AS c1 , `cancer_study` " +
"WHERE c1.`CANCER_STUDY_ID` = cancer_study.`CANCER_STUDY_ID` AND cancer_study.`CANCER_STUDY_ID`=? " +
"GROUP BY cancer_study.`CANCER_STUDY_ID` , `SAMPLE_ID` HAVING SUM(`END`-`START`) > 0;");
pstmt.setInt(1, cancerStudyId);
(sampleIds == null ? "" : ("AND `SAMPLE_ID` IN ("+ String.join(",", Collections.nCopies(sampleIds.size(), "?")) + ") "))
+"GROUP BY cancer_study.`CANCER_STUDY_ID` , `SAMPLE_ID` HAVING SUM(`END`-`START`) > 0;");
int parameterIndex = 1;
pstmt.setInt(parameterIndex++, cancerStudyId);
if (sampleIds != null) {
for (Integer sampleId : sampleIds) {
pstmt.setInt(parameterIndex++, sampleId);
}
}
Map<Integer, String> fractionGenomeAltereds = new HashMap<Integer, String>();
rs = pstmt.executeQuery();
while (rs.next()) {
Expand All @@ -94,7 +101,10 @@ public static void createFractionGenomeAlteredClinicalData(int cancerStudyId) th
false, "20", cancerStudyId);
DaoClinicalAttributeMeta.addDatum(attr);
}


if (updateMode) {
DaoClinicalData.removeSampleAttributesData(fractionGenomeAltereds.keySet(), FRACTION_GENOME_ALTERED_ATTR_ID);
}
for (Map.Entry<Integer, String> fractionGenomeAltered : fractionGenomeAltereds.entrySet()) {
DaoClinicalData.addSampleDatum(fractionGenomeAltered.getKey(), FRACTION_GENOME_ALTERED_ATTR_ID, fractionGenomeAltered.getValue());
}
Expand Down Expand Up @@ -283,4 +293,27 @@ public static boolean segmentDataExistForSample(int cancerStudyId, int sampleId)
JdbcUtil.closeAll(DaoCopyNumberSegment.class, con, pstmt, rs);
}
}

public static void deleteSegmentDataForSamples(int cancerStudyId, Set<Integer> sampleIds) throws DaoException {
Connection con = null;
PreparedStatement pstmt = null;
ResultSet rs = null;
try {
con = JdbcUtil.getDbConnection(DaoCopyNumberSegment.class);
pstmt = con.prepareStatement("DELETE FROM `copy_number_seg`" +
" WHERE `CANCER_STUDY_ID`= ?" +
" AND `SAMPLE_ID` IN (" + String.join(",", Collections.nCopies(sampleIds.size(), "?"))
+ ")");
int parameterIndex = 1;
pstmt.setInt(parameterIndex++, cancerStudyId);
for (Integer sampleId : sampleIds) {
pstmt.setInt(parameterIndex++, sampleId);
}
pstmt.executeUpdate();
} catch (SQLException e) {
throw new DaoException(e);
} finally {
JdbcUtil.closeAll(DaoCopyNumberSegment.class, con, pstmt, rs);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public static int addCopyNumberSegmentFile(CopyNumberSegmentFile copySegFile) th
} catch (SQLException e) {
throw new DaoException(e);
} finally {
JdbcUtil.closeAll(DaoCopyNumberSegment.class, con, pstmt, rs);
JdbcUtil.closeAll(DaoCopyNumberSegmentFile.class, con, pstmt, rs);
}
}

Expand All @@ -86,6 +86,9 @@ public static CopyNumberSegmentFile getCopyNumberSegmentFile(int cancerStudyId)
cnsf.referenceGenomeId = CopyNumberSegmentFile.ReferenceGenomeId.valueOf(rs.getString("REFERENCE_GENOME_ID"));
cnsf.description = rs.getString("DESCRIPTION");
cnsf.filename = rs.getString("FILENAME");
if (rs.next()) {
throw new SQLException("More than one row was returned.");
}
return cnsf;
}
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@
import java.io.FileReader;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.HashSet;
import java.util.Properties;
import java.util.Set;

/**
* Import Segment data into database.
Expand All @@ -64,14 +66,17 @@
public class ImportCopyNumberSegmentData extends ConsoleRunnable {

private int entriesSkipped;

private boolean updateMode;
private Set<Integer> processedSampleIds;

private void importData(File file, int cancerStudyId) throws IOException, DaoException {
MySQLbulkLoader.bulkLoadOn();
FileReader reader = new FileReader(file);
BufferedReader buf = new BufferedReader(reader);
try {
String line = buf.readLine(); // skip header line
long segId = DaoCopyNumberSegment.getLargestId();
processedSampleIds = new HashSet<>();
while ((line=buf.readLine()) != null) {
ProgressMonitor.incrementCurValue();
ConsoleUtil.showProgress();
Expand All @@ -81,8 +86,7 @@ private void importData(File file, int cancerStudyId) throws IOException, DaoExc
System.err.println("wrong format: "+line);
}

CancerStudy cancerStudy = DaoCancerStudy.getCancerStudyByInternalId(cancerStudyId);
String chrom = strs[1].trim();
String chrom = strs[1].trim();
//validate in same way as GistitReader:
ValidationUtils.validateChromosome(chrom);

Expand Down Expand Up @@ -112,6 +116,10 @@ private void importData(File file, int cancerStudyId) throws IOException, DaoExc
CopyNumberSegment cns = new CopyNumberSegment(cancerStudyId, s.getInternalId(), chrom, start, end, numProbes, segMean);
cns.setSegId(++segId);
DaoCopyNumberSegment.addCopyNumberSegment(cns);
processedSampleIds.add(s.getInternalId());
}
if (updateMode) {
DaoCopyNumberSegment.deleteSegmentDataForSamples(cancerStudyId, processedSampleIds);
}
MySQLbulkLoader.flushAll();
}
Expand All @@ -127,6 +135,7 @@ public void run() {
OptionSet options = ConsoleUtil.parseStandardDataAndMetaOptions(args, description, true);
String dataFile = (String) options.valueOf("data");
File descriptorFile = new File((String) options.valueOf("meta"));
updateMode = options.has("overwrite-existing");

Properties properties = new Properties();
properties.load(new FileInputStream(descriptorFile));
Expand All @@ -135,13 +144,13 @@ public void run() {

CancerStudy cancerStudy = getCancerStudy(properties);

if (segmentDataExistsForCancerStudy(cancerStudy)) {
if (!updateMode && segmentDataExistsForCancerStudy(cancerStudy)) {
throw new IllegalArgumentException("Seg data for cancer study " + cancerStudy.getCancerStudyStableId() + " has already been imported: " + dataFile);
}

importCopyNumberSegmentFileMetadata(cancerStudy, properties);
importCopyNumberSegmentFileData(cancerStudy, dataFile);
DaoCopyNumberSegment.createFractionGenomeAlteredClinicalData(cancerStudy.getInternalId());
DaoCopyNumberSegment.createFractionGenomeAlteredClinicalData(cancerStudy.getInternalId(), processedSampleIds, updateMode);
if( MySQLbulkLoader.isBulkLoad()) {
MySQLbulkLoader.flushAll();
}
Expand All @@ -164,7 +173,7 @@ private static boolean segmentDataExistsForCancerStudy(CancerStudy cancerStudy)
return (DaoCopyNumberSegment.segmentDataExistForCancerStudy(cancerStudy.getInternalId()));
}

private static void importCopyNumberSegmentFileMetadata(CancerStudy cancerStudy, Properties properties) throws DaoException {
private void importCopyNumberSegmentFileMetadata(CancerStudy cancerStudy, Properties properties) throws DaoException {
CopyNumberSegmentFile copyNumSegFile = new CopyNumberSegmentFile();
copyNumSegFile.cancerStudyId = cancerStudy.getInternalId();
String referenceGenomeId = properties.getProperty("reference_genome_id").trim();
Expand All @@ -179,7 +188,18 @@ private static void importCopyNumberSegmentFileMetadata(CancerStudy cancerStudy,
copyNumSegFile.referenceGenomeId = getRefGenId(referenceGenomeId);
copyNumSegFile.description = properties.getProperty("description").trim();
copyNumSegFile.filename = properties.getProperty("data_filename").trim();
DaoCopyNumberSegmentFile.addCopyNumberSegmentFile(copyNumSegFile);
CopyNumberSegmentFile storedCopyNumSegFile = DaoCopyNumberSegmentFile.getCopyNumberSegmentFile(cancerStudy.getInternalId());
if (updateMode && storedCopyNumSegFile != null) {
if (storedCopyNumSegFile.referenceGenomeId != copyNumSegFile.referenceGenomeId) {
throw new IllegalStateException("You are trying to upload "
+ copyNumSegFile.referenceGenomeId
+ " reference genome data into "
+ storedCopyNumSegFile.referenceGenomeId
+ " reference genome data.");
}
} else {
DaoCopyNumberSegmentFile.addCopyNumberSegmentFile(copyNumSegFile);
}
}

private void importCopyNumberSegmentFileData(CancerStudy cancerStudy, String dataFilename) throws IOException, DaoException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,13 @@ public void run() {
try {
String description = "Import 'timeline' data";

OptionSet options = ConsoleUtil.parseStandardDataAndMetaOptions(args, description, false);
String dataFile = (String) options.valueOf("data");
OptionSet options = ConsoleUtil.parseStandardDataAndMetaOptions(args, description, true);
if (options.has("loadMode") && !"bulkLoad".equals(options.valueOf("loadMode"))) {
throw new UnsupportedOperationException("This loader supports bulkLoad load mode only, but "
+ options.valueOf("loadMode")
+ " has been supplied.");
}
String dataFile = (String) options.valueOf("data");
File descriptorFile = new File((String) options.valueOf("meta"));
boolean overwriteExisting = options.has("overwrite-existing");

Expand Down
4 changes: 0 additions & 4 deletions src/main/java/org/mskcc/cbio/portal/util/ConsoleUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,6 @@ public static OptionSet parseStandardDataAndMetaOptions(String[] args, String de
"Error: unknown loadMode action: " + actionArg);
}
}
else {
throw new UsageException(progName, description, parser,
"Error: 'loadMode' argument required.");
}
}
return options;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/*
* This file is part of cBioPortal.
*
* cBioPortal is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package org.mskcc.cbio.portal.integrationTest.incremental;

import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mskcc.cbio.portal.dao.DaoCancerStudy;
import org.mskcc.cbio.portal.dao.DaoClinicalData;
import org.mskcc.cbio.portal.dao.DaoCopyNumberSegment;
import org.mskcc.cbio.portal.dao.DaoCopyNumberSegmentFile;
import org.mskcc.cbio.portal.dao.DaoException;
import org.mskcc.cbio.portal.dao.DaoSample;
import org.mskcc.cbio.portal.dao.MySQLbulkLoader;
import org.mskcc.cbio.portal.model.CancerStudy;
import org.mskcc.cbio.portal.model.ClinicalData;
import org.mskcc.cbio.portal.model.CopyNumberSegment;
import org.mskcc.cbio.portal.model.CopyNumberSegmentFile;
import org.mskcc.cbio.portal.model.Sample;
import org.mskcc.cbio.portal.scripts.ImportCopyNumberSegmentData;
import org.springframework.test.annotation.Rollback;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import org.springframework.transaction.annotation.Transactional;

import java.io.File;
import java.util.List;
import java.util.Set;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;

/**
* Tests Incremental Import of CNA segmented data.
*
* @author Ruslan Forostianov
* @author Pieter Lukasse
*/
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations = { "classpath:/applicationContext-dao.xml" })
@Rollback
@Transactional
public class TestIncrementalCopyNumberSegmentDataImport {

/**
* Test incremental upload of CNA SEG data
*/
@Test
public void testIncrementalUpload() throws DaoException {
String segSampleId = "TCGA-A1-A0SE-01";
Sample segDataSample = DaoSample.getSampleByCancerStudyAndSampleId(cancerStudy.getInternalId(), segSampleId);

CopyNumberSegmentFile copyNumberSegmentFile = new CopyNumberSegmentFile();
copyNumberSegmentFile.cancerStudyId = cancerStudy.getInternalId();
copyNumberSegmentFile.referenceGenomeId = CopyNumberSegmentFile.ReferenceGenomeId.hg19;
copyNumberSegmentFile.segFileId = 1;
copyNumberSegmentFile.filename = "test_file.seg";
copyNumberSegmentFile.description = "test seg file description";
DaoCopyNumberSegmentFile.addCopyNumberSegmentFile(copyNumberSegmentFile);
DaoClinicalData.addSampleDatum(segDataSample.getInternalId(), "FRACTION_GENOME_ALTERED", "TEST");
MySQLbulkLoader.bulkLoadOn();
CopyNumberSegment copyNumberSegment = new CopyNumberSegment(
cancerStudy.getInternalId(),
segDataSample.getInternalId(),
"1",
3218610,
95674710,
100,
0.01);
copyNumberSegment.setSegId(1L);
DaoCopyNumberSegment.addCopyNumberSegment(copyNumberSegment);
MySQLbulkLoader.flushAll();

File dataFolder = new File("src/test/resources/incremental/copy_number_alteration/");
File metaFile = new File(dataFolder, "meta_cna_seg.txt");
File dataFile = new File(dataFolder, "data_cna.seg");

ImportCopyNumberSegmentData importCnaSegData = new ImportCopyNumberSegmentData(new String[] {
"--loadMode", "bulkLoad",
"--meta", metaFile.getAbsolutePath(),
"--data", dataFile.getAbsolutePath(),
"--overwrite-existing",
});
importCnaSegData.run();

CopyNumberSegmentFile fetchedCopyNumberSegmentFile = DaoCopyNumberSegmentFile.getCopyNumberSegmentFile(cancerStudy.getInternalId());
assertNotNull(fetchedCopyNumberSegmentFile);
assertEquals("test_file.seg", fetchedCopyNumberSegmentFile.filename);
List<CopyNumberSegment> cnaSegments = DaoCopyNumberSegment
.getSegmentForASample(segDataSample.getInternalId(), cancerStudy.getInternalId());
assertEquals(9, cnaSegments.size());
List<ClinicalData> clinicalData = DaoClinicalData.getSampleData(cancerStudy.getInternalId(), Set.of(segSampleId));
ClinicalData fractionGenomeAltered = clinicalData.stream()
.filter(cd -> "FRACTION_GENOME_ALTERED".equals(cd.getAttrId())).findFirst().get();
assertEquals("0.0000", fractionGenomeAltered.getAttrVal());
}

public static final String STUDY_ID = "study_tcga_pub";
private CancerStudy cancerStudy;

@Before
public void setUp() throws DaoException {
cancerStudy = DaoCancerStudy.getCancerStudyByStableId(STUDY_ID);
}

}
10 changes: 10 additions & 0 deletions src/test/resources/incremental/copy_number_alteration/data_cna.seg
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
ID chrom loc.start loc.end num.mark seg.mean
TCGA-A1-A0SE-01 1 3218610 95674710 53225 0.0055
TCGA-A1-A0SE-01 1 95676511 95676518 2 -1.6636
TCGA-A1-A0SE-01 1 95680124 167057183 24886 0.0053
TCGA-A1-A0SE-01 1 167057495 167059336 3 -1.0999
TCGA-A1-A0SE-01 1 167059760 181602002 9213 -8e-04
TCGA-A1-A0SE-01 1 181603120 181609567 6 -1.2009
TCGA-A1-A0SE-01 1 181610685 201473647 12002 0.0055
TCGA-A1-A0SE-01 1 201474400 201474544 2 -1.4235
TCGA-A1-A0SE-01 1 201475220 247813706 29781 -4e-04
Loading

0 comments on commit d15c579

Please sign in to comment.