Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 152 additions & 0 deletions core/src/main/java/org/htsjdk/core/api/cigar/Cigar.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
package org.htsjdk.core.api.cigar;

import java.util.stream.StreamSupport;

/**
* Represents a pairwise alignment using the CIGAR format.
*
* <p>The CIGAR (Compact Idiosyncratic Gapped Alignment Report) format is used in the
* <a href="http://samtools.github.io/hts-specs/SAMv1.pdf">SAM specifications</a> to represent
* alignments between a read and a reference genome, but it could also represent other pairwise
* alignments. Here we use a nomenclature based on the alignments between a read an an reference
* (e.g., {@link #getReferenceLength()} and {@link #getReadLength()}).
*
* <p>As a run-length encoded format, each element ({@link CigarElement}) of the CIGAR contains the
* length of a concrete alignment operator ({@link CigarOperator}).
*
* <h2>Example
*
* <p>{@code 10M1D25M}:
*
* <ol>
* <li>Match or mismatch ({@link CigarOperator#M}) for 10 bases.</li>
* <li>Deletion ({@link CigarOperator#D}) of 1 bases.</li>
* <li>Match or mismatch ({@link CigarOperator#M}) for 25 bases.</li>
* </ol>
*
* @implSpec an empty {@link Cigar} represents an undefined alignment.
*/
public interface Cigar extends Iterable<CigarElement> {

/**
* Gets the number of elements on this CIGAR.
*
* @return number of elements on the cigar.
*/
public int size();

/**
* Checks if this {@link Cigar} does not contain any element.
*
* @return {@code true} if the CIGAR is empty; {@code false} otherwise.
* @implNote default implementation checks if {@link #size()} returns {@code 0}.
* @implSpec return value should be equivalent to default implementation.
*/
default boolean isEmpty() {
return size() == 0;
}

/**
* Gets the CIGAR element at position {@code i}.
*
* @param i index for the CIGAR element (0-based).
* @return the element at position {@code i}.
* @throws IndexOutOfBoundsException if the index does not fit into the cigar elements.
*/
// TODO: should we throw an HtsjdkException
public CigarElement getCigarElement(final int i);

/**
* Returns the CIGAR element at the first position.
*
* @return the element at position {@code 0}.
* @implNote default implementation returns {@code getCigarElement(0)}.
* @implSpec return value should be equivalent to default implementation.
*/
default CigarElement getFirstCigarElement() {
return getCigarElement(0);
}

/**
* Returns the CIGAR element at the last position.
*
* @return the element at position {@code 0}.
* @implNote default implementation returns {@code getCigarElement(size() - 1)}.
* @implSpec return value should be equivalent to default implementation.
*/
default CigarElement getLastCigarElement() {
return getCigarElement(this.size() - 1);
}

/**
* Gets the length on the reference for this CIGAR (excluding padding).
*
* @return number of reference bases that the cigar covers.
*/
default int getReferenceLength() {
return StreamSupport.stream(spliterator(), false)
.filter(CigarElement::consumesReferenceBases)
.mapToInt(CigarElement::getLength).sum();
}

/**
* Gets the length on the reference for this CIGAR (including padding).
*
* @return number of reference bases that the cigar covers with padding.
*/
default int getPaddedReferenceLength() {
return StreamSupport.stream(spliterator(), false)
.filter(e -> e.consumesReferenceBases() || e.getOperator() == CigarOperator.P)
.mapToInt(CigarElement::getLength).sum();
}

/**
* Gets the length on the read for this CIGAR.
*
* @return number of read bases that the cigar covers.
*/
default int getReadLength() {
return StreamSupport.stream(spliterator(), false)
.filter(CigarElement::consumesReadBases)
.mapToInt(CigarElement::getLength).sum();
}

/**
* Checks if the CIGAR contains a concrete operator.
*
* @param operator operator to check for.
* @return {@code true} if the operator is found in at least one element; {@code false} otherwise.
*/
default boolean containsOperator(final CigarOperator operator) {
return StreamSupport.stream(spliterator(), false)
.anyMatch(element -> element.getOperator() == operator);
}

/** returns true if the cigar string starts With a clipping operator */
/**
* Checks if the CIGAR is left-clipped.
*
* @return {@code true} if the first element represents a clip; {@code false} otherwise.
*/
default boolean isLeftClipped() {
return getFirstCigarElement().getOperator().isClipping();
}

/**
* Checks if the CIGAR is right-clipped.
*
* @return {@code true} if the last element represents a clip; {@code false} otherwise.
*/
default boolean isRightClipped() {
return getLastCigarElement().getOperator().isClipping();
}

/**
* Checks if the CIGAR is clipped in any extreme.
*
* @return {@code true} if at least one extreme represents a clip; {@code false} otherwise.
*/
default boolean isClipped() {
return isLeftClipped() || isRightClipped();
}
}
85 changes: 85 additions & 0 deletions core/src/main/java/org/htsjdk/core/api/cigar/CigarElement.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
package org.htsjdk.core.api.cigar;

/**
* Represents each element of a {@link Cigar}.
*
* <p>As a run-length encoded format, each element of the CIGAR contains the
* length ({@link #getLength()}) of a concrete alignment operator ({@link #getOperator()}).
*
* @see CigarOperator
*/
public interface CigarElement {

/**
* Gets the length of the element.
*
* @return element length.
*/
public int getLength();

/**
* Gets the operator of the element.
*
* @return element operator.
*/
public CigarOperator getOperator();

/**
* Checks if the element's operator "consume" bases from the reads.
*
* @return {@code true} if the operator "consume" bases; {@code false} otherwise.
*
* @implNote default implementation returns {@code getOperator().consumesReadBases()}.
* @implSpec return value should be equivalent to the default implementation.
*/
default boolean consumesReadBases() {
// sugar syntax
return getOperator().consumesReadBases();
}

/**
* Checks if the element's operator "consume" bases from the reference.
*
* @return {@code true} if the operator "consume" bases; {@code false} otherwise.
*
* @implNote default implementation returns {@code getOperator().consumesReferenceBases()}.
* @implSpec return value should be equivalent to the default implementation.
*/
default boolean consumesReferenceBases() {
// sugar syntax
return getOperator().consumesReferenceBases();
}

/**
* Checks if the element's operator represents a clip (hard or soft).
*
* @return {@code true} if the operator represent a clip; {@code false} otherwise.
*
* @see CigarOperator#isIndel()
*/
default boolean isClipping() {
return getOperator().isClipping();
}

/**
* Checks if the element's operator represents an indel (insertion or deletion).
*
* @return {@code true} if the operator represent an indel; {@code false} otherwise.
*
* @see CigarOperator#isIndel()
*/
default boolean isIndel() {
return getOperator().isIndel();
}

/**
* Checks if the element's operator represents an alignment (match or mismatch).
*
* @return {@code true} if the operator represent an alignment; {@code false} otherwise.
*
* @see CigarOperator#isIndel()
*/
default boolean isAlignment() {
return getOperator().isAlignment();
}
}
94 changes: 94 additions & 0 deletions core/src/main/java/org/htsjdk/core/api/cigar/CigarOperator.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package org.htsjdk.core.api.cigar;

/**
* Operations supported in the {@link Cigar} format.
*
* <p>Includes both standard ({@link #M}, {@link #I}, {@link #D}) and extended CIGAR elements
*
* @see Cigar
*/
public enum CigarOperator {
/** Match or mismatch */
M(true, true),
/** Insertion vs. the reference. */
I(true, false),
/** Deletion vs. the reference. */
D(false, true),
/** Skipped region from the reference. */
N(false, true),
/** Soft clip. */
S(true, false),
/** Hard clip. */
H(false, false),
/** Padding. */
P(false, false),
/** Matches the reference. */
EQ(true, true),
/** Mismatches the reference. */
X(true, true);

private final boolean consumesReadBases;
private final boolean consumesReferenceBases;

/**
* Default constructor.
*
* @param consumesReadBases {@code true} if it consumes read bases.
* @param consumesReferenceBases {@code true} if it consumes reference bases.
*/
CigarOperator(boolean consumesReadBases, boolean consumesReferenceBases) {
this.consumesReadBases = consumesReadBases;
this.consumesReferenceBases = consumesReferenceBases;
}

/**
* Checks if the operator "consume" bases from the reads.
*
* @return {@code true} if the operator "consume" bases; {@code false} otherwise.
*/
public boolean consumesReadBases() {
return consumesReadBases;
}

/**
* Checks if the operator "consume" bases from the reference.
*
* @return {@code true} if the operator "consume" bases; {@code false} otherwise.
*/
public boolean consumesReferenceBases() {
return consumesReferenceBases;
}

/**
* Checks if the operator represents a clip (hard or soft).
*
* <p>Operators representing clips are {@link #S} and {@link #H}.
*
* @return {@code true} if the operator represent a clip; {@code false} otherwise.
*/
public boolean isClipping() {
return this == S || this == H;
}

/**
* Checks if the operator represents an indel (insertion or deletion).
*
* <p>Operators representing indels are {@link #I} and {@link #D}.
*
* @return {@code true} if the operator represent an indel; {@code false} otherwise.
*/
public boolean isIndel() {
return this == I || this == D;
}

/**
* Checks if the operator represents an alignment (match or mismatch).
*
* <p>Operators representing clips are {@link #M}, {@link #X} and {@link #EQ}.
*
* @return {@code true} if the operator represent an alignment; {@code false} otherwise.
*/
public boolean isAlignment() {
return this == M || this == X || this == EQ;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
/**
* API for the CIGAR pair-wise alignment representation.
*/
package org.htsjdk.core.api.cigar;