-
Notifications
You must be signed in to change notification settings - Fork 173
/
CSIv1.tex
77 lines (68 loc) · 3.7 KB
/
CSIv1.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
\documentclass[10pt]{article}
\usepackage{color}
\definecolor{gray}{rgb}{0.7,0.7,0.7}
\usepackage{framed}
\usepackage{enumitem}
\usepackage{longtable}
\addtolength{\textwidth}{3.4cm}
\addtolength{\hoffset}{-1.7cm}
\addtolength{\textheight}{4cm}
\addtolength{\voffset}{-2cm}
\begin{document}
\begin{table}[h]
{\small
\begin{tabular}{|l|l|l|l|l|l|r|}
\cline{1-7}
\multicolumn{4}{|c|}{\bf Field} & \multicolumn{1}{c|}{\bf Description} & \multicolumn{1}{c|}{\bf Type} & \multicolumn{1}{c|}{\bf Value} \\\cline{1-7}
\multicolumn{4}{|l|}{\sf magic} & Magic string & {\tt char[4]} & {\tt CSI\char92 1}\\\cline{1-7}
\multicolumn{4}{|l|}{\sf min\_shift} & \# bits for the minimal interval & {\tt int32\_t} & [14]\\\cline{1-7}
\multicolumn{4}{|l|}{\sf depth} & Depth of the binning index & {\tt int32\_t} & [5]\\\cline{1-7}
\multicolumn{4}{|l|}{\sf l\_aux} & Length of auxiliary data & {\tt int32\_t} & [0]\\\cline{1-7}
\multicolumn{4}{|l|}{\sf aux} & Auxiliary data & {\tt uint8\_t[l\_aux]} & \\\cline{1-7}
\multicolumn{4}{|l|}{\sf n\_ref} & \# reference sequences & {\tt int32\_t} & \\\cline{1-7}
\multicolumn{7}{|c|}{\textcolor{gray}{\it List of indices (n=n\_ref)}} \\\cline{2-7}
& \multicolumn{3}{l|}{\sf n\_bin} & \# distinct bins & {\tt int32\_t} & \\\cline{2-7}
& \multicolumn{6}{c|}{\textcolor{gray}{\it List of distinct bins (n=n\_bin)}} \\\cline{3-7}
& & \multicolumn{2}{l|}{\sf bin} & Distinct bin & {\tt uint32\_t} & \\\cline{3-7}
& & \multicolumn{2}{l|}{\sf loffset} & (Virtual) file offset of the first overlapping record & {\tt uint64\_t} & \\\cline{3-7}
& & \multicolumn{2}{l|}{\sf n\_chunk} & \# chunks & {\tt int32\_t} & \\\cline{3-7}
& & \multicolumn{5}{c|}{\textcolor{gray}{\it List of chunks (n=n\_chunk)}} \\\cline{4-7}
& & & {\sf chunk\_beg} & (Virtual) file offset of the start of the chunk & {\tt uint64\_t} & \\\cline{4-7}
& & & {\sf chunk\_end} & (Virtual) file offset of the end of the chunk & {\tt uint64\_t} & \\\cline{1-7}
\multicolumn{4}{|l|}{{\sf n\_no\_coor} (optional)} & \# unmapped unplaced reads ({\sf RNAME} *) & {\tt uint64\_t} & \\\cline{1-7}
\end{tabular}}
\end{table}
\noindent
The following functions generalise those given in the SAM specification for a BAI-style binning scheme.
Note that in CSI \textit{depth} refers to the scheme's maximal depth, i.e., the level number of the scheme's smallest bins, and the single-bin level spanning the entire coordinate range is level 0.
Hence the BAI-style binning scheme, with six levels in total, is represented by $\mbox{\sf min\_shift} = 14, \mbox{\sf depth} = 5$.
CSI index files may contain metadata pseudo-bins for each reference sequence, with the same contents as BAI pseudo-bins.
In CSI, the pseudo-bins have bin number $\mbox{\sf bin\_limit}(\mbox{\sf min\_shift}, \mbox{\sf depth}) + 1$.
{\footnotesize
\begin{verbatim}
/* calculate bin given an alignment covering [beg,end) (zero-based, half-close-half-open) */
int reg2bin(int64_t beg, int64_t end, int min_shift, int depth)
{
int l, s = min_shift, t = ((1<<depth*3) - 1) / 7;
for (--end, l = depth; l > 0; --l, s += 3, t -= 1<<l*3)
if (beg>>s == end>>s) return t + (beg>>s);
return 0;
}
/* calculate the list of bins that may overlap with region [beg,end) (zero-based) */
int reg2bins(int64_t beg, int64_t end, int min_shift, int depth, int *bins)
{
int l, t, n, s = min_shift + depth*3;
for (--end, l = n = t = 0; l <= depth; s -= 3, t += 1<<l*3, ++l) {
int b = t + (beg>>s), e = t + (end>>s), i;
for (i = b; i <= e; ++i) bins[n++] = i;
}
return n;
}
/* calculate maximum bin number -- valid bin numbers range within [0,bin_limit) */
int bin_limit(int min_shift, int depth)
{
return ((1 << (depth+1)*3) - 1) / 7;
}
\end{verbatim}
}
\end{document}