Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Francesca Giordano committed Apr 7, 2017
1 parent b8c12d0 commit 4b63e23
Show file tree
Hide file tree
Showing 10 changed files with 2,915 additions and 0 deletions.
489 changes: 489 additions & 0 deletions src/fast.c

Large diffs are not rendered by default.

21 changes: 21 additions & 0 deletions src/fasta.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@

typedef struct
{
char *name;
char *name2;
char *path;
char *SCFname;
int length;
char *data;
char *qual;
int finished;
} fasta;

#define B64_long long int
//#define B64_long long long int
fasta *decodeFastq (char *fname, int *nContigs, B64_long *tB, char* pdata, B64_long Size_pdata,fasta *segg);
int extractFastq(char *fname, char *pdata, B64_long Size_pdata);
fasta *splitFastq ( fasta *iseg, int inSeg, int tB, int *nReads, int length, int step);
void fastaLC (fasta *seg, int nSeg);
void fastaUC (fasta *seg, int nSeg);

32 changes: 32 additions & 0 deletions src/makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Makefile for smis_scaffold
CC=c++

CFLAGS= -O2 -I$(MYBAMTOOLS)/include -Wl,-rpath,$(MYBAMTOOLS)/lib
LFLAGS= -lm -L$(MYBAMTOOLS)/lib -lbamtools -lz


SMISSORTT= smis_sort.o
SMISRENAME= fast.o smis_rename.o
SMISSHREDD= fast.o smis_shred.o
SMISSELCHR= smis_select_chr.o


all : smis_sort smis_rename smis_shred smis_select_chr


smis_sort: makefile $(SMISSORTT)
$(CC) $(CFLAGS) -o $@ $(SMISSORTT) $(LFLAGS)
chmod o-r smis_sort

smis_select_chr: makefile $(SMISSELCHR)
$(CC) $(CFLAGS) -o $@ $(SMISSELCHR) $(LFLAGS)
chmod o-r smis_select_chr

smis_rename: makefile $(SMISRENAME)
$(CC) $(CFLAGS) -o $@ $(SMISRENAME) $(LFLAGS)
chmod o-r smis_rename

smis_shred: makefile $(SMISSHREDD)
$(CC) $(CFLAGS) -o $@ $(SMISSHREDD) $(LFLAGS)
chmod o-r smis_shred

67 changes: 67 additions & 0 deletions src/plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import matplotlib.pyplot as plt
import sys
import numpy as np
from matplotlib.backends.backend_pdf import PdfPages
from time import time,ctime


plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)

font = {'family' : 'sans-serif',
'weight' : 'normal',
'size' : 15}

plt.rc('font', **font)

print ' Python plotting...'

pos=[]
ins=[]
nins=[]

file_name = sys.argv[1]
chromosome = sys.argv[2]
fp = open(file_name, 'r')
for line in fp:
col= line.split()
pos.append(float(col[1]))
ins.append(float(col[2]))
nins.append(float(col[3]))
fp.close()

x = np.array(pos)
y = np.array(ins)
ny= np.array(nins)

myfig = PdfPages(chromosome + '.pdf')

fig = plt.figure(figsize=(13,6.5))

# change hor scale to include all genome length?

fig.add_subplot(1,2,1)
plt.ylabel('Insert Size')
plt.xlabel('Position on Chromosome '+ chromosome)
#text(x, y, s, fontsize=12)
plt.plot(x,y,'ro')
plt.axhline(y=0)
plt.tight_layout()
#plt.show()

fig.add_subplot(1,2,2)
plt.ylim((-1,1))
plt.ylabel('Normalized Insert Size')
plt.xlabel('Position on Chromosome '+ chromosome)
#text(x, y, s, fontsize=12)
plt.plot(x,ny,'ro')
plt.axhline(y=0)
plt.tight_layout()

plt.show()


#myfig = PdfPages(chromosome + '.pdf')
myfig.savefig(fig)
myfig.close()

152 changes: 152 additions & 0 deletions src/smis_rename.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/****************************************************************************
****************************************************************************
* *
* Copyright (C) 2013, 2014 Genome Research Ltd. *
* *
* Author: Zemin Ning ([email protected]) *
* *
* This file is part of smis_pipeline. *
* *
* ssaha_pileup is free software: you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation, either version 3 of the License, or (at your *
* option) any later version. *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
* General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License along *
* with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
****************************************************************************
****************************************************************************/
/****************************************************************************/

#include <math.h>
#include <values.h>
#include <stdio.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <dirent.h>
#include <string.h>
#include <ctype.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <sys/signal.h>
#include <errno.h>
#include "smissv.h"



static B64_long sBase;

/* SSAS default parameters */

static int num_reads=0;
static int n_contigs = 0;

fasta *seq;


int main(int argc, char **argv)
{
FILE *namef;
int i,nSeq,args;
char line[2000]={0};
fasta *seq; // double declaration???

void Read_Pairs(char **argv,int args,int nLib,int nSeq);
void Rename_Process(char **argv,int args,int nRead);

seq=NULL;

if(argc < 2)
{
printf(" %s <input fasta/q> <output fasta >\n",argv[0]);
exit(1);
}

nSeq=0;
args=1;


num_reads = 0;
Rename_Process(argv,args,num_reads);
return EXIT_SUCCESS;

}
/* end of the main */

/* Subroutine to process alignment information */
/* ==================================================== */
void Rename_Process(char **argv,int args,int nRead)
/* ==================================================== */
{
int i,j,k,rc,nSeq = nRead;
char **DBname,*ptr,RC,nametag1[100],nametag2[100];
char *line,*st,*ed;
FILE *fp,*namef,*namef2,*fpOutfast,*fpOutfast2;
B64_long *read_offsets,big_num,sum_bases;
int n_patch,idd,stopflag,num_gcs,*contig_index;
float rate;
fasta *segg,*seqp;
B64_long Size_q_pdata;
int num_seqque;
char *pdata;

if((fp=fopen(argv[args],"rb"))==NULL) printf("Cannot open file\n");
fseek(fp, 0, SEEK_END);
Size_q_pdata = ftell(fp) + 1;
fclose(fp);
if((pdata=(char*)calloc(Size_q_pdata,sizeof(char)))==NULL)
printf("calloc pdata\n");
num_seqque = extractFastq(argv[args],pdata,Size_q_pdata);
if((segg=(fasta*)calloc((num_seqque),sizeof(fasta)))==NULL)
printf("calloc segg\n");
if((seq=decodeFastq(argv[args],&num_seqque,&sBase,pdata,Size_q_pdata,segg))==NULL)
printf("no query data found.\n");
n_contigs = num_seqque;
fastaUC(seq,n_contigs);

nSeq = n_contigs;
nRead = n_contigs;

if((namef = fopen(argv[args+1],"w")) == NULL)
{
printf("ERROR main:: reads group file \n");
exit(1);
}
/* output all the fastq files */
for(i=0;i<nSeq;i++)
{
char *dpp;
int gen_len,nline;

seqp=seq+i;
gen_len = seqp->length;
fprintf(namef,">contig%dsize%d\n",i+1,gen_len);
//fprintf(namef,">contig/%d/0_%d RQ=0.8\n",i+1,gen_len);

//fprintf(namef,">%d\n",i+1);

nline = gen_len/60;
for(k=0;k<nline;k++)
{
for(j=0;j<60;j++)
fprintf(namef,"%c",seqp->data[k*60+j]);
fprintf(namef,"\n");
}
for(j=0;j<(gen_len-(nline*60));j++)
fprintf(namef,"%c",seqp->data[nline*60+j]);
if((seqp->length%60)!=0)
fprintf(namef,"\n");
}
fclose(namef);
}



Loading

0 comments on commit 4b63e23

Please sign in to comment.