forked from weizhongli/cdhit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cd-hit-clstr_2_blm8.pl
executable file
·61 lines (57 loc) · 2.25 KB
/
cd-hit-clstr_2_blm8.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env perl
#
my $rep;
my @non_reps = ();
my @aln_info = ();
while($ll=<>){
if ($ll =~ /^>/ ) {
if (@non_reps) {
for ($i=0; $i<@non_reps; $i++) {
print "$non_reps[$i]\t$rep\t$aln_info[$i]\n";
}
}
$rep = "";
@non_reps = ();
@aln_info = ();
}
else {
chop($ll);
if ($ll =~ />(.+)\.\.\./ ) {
my $id = $1;
if ($ll =~ /\*$/) { $rep = $id}
else {
push(@non_reps, $id);
my @lls = split(/\s+/, $ll);
# my ($a, $iden) = split(/\//, $lls[-1]); #### bug, with cd-hit-est-2d, there are +/- sign e.g. 10:1029:30:1042/+/97.35%
my @mms = split(/\//, $lls[-1]);
my $a = $mms[0];
my $iden = $mms[-1];
chop($iden); ### removing % sign
my ($qb, $qe, $sb, $se) = split(/:/, $a);
my $alnln = $qe-$qb+1;
my $mis = int($alnln * (100-$iden) / 100);
my $gap = 0;
my $exp = 0;
my $bit = $alnln*3 - $mis*6; #### very rough
push(@aln_info, "$iden\t$alnln\t$mis\t$gap\t$qb\t$qe\t$sb\t$se\t$exp\t$bit");
}
}
#>Cluster 582
#0 6671aa, >gi|302514050|uid|51... *
#1 339aa, >SRS584717|scaffold|... at 2:332:4020:4356/89.32%
#2 182aa, >SRS584717|scaffold|... at 1:182:6490:6671/100.00%
#3 367aa, >SRS584717|scaffold|... at 1:332:4543:4874/90.66%
#4 109aa, >SRS584717|scaffold|... at 1:108:5782:5889/97.22%
}
}
if (@non_reps) {
for ($i=0; $i<@non_reps; $i++) {
print "$non_reps[$i]\t$rep\t$aln_info[$i]\n";
}
}
#query subject % alnln mis gap q_b q_e s_b s_e expect bits
##0 1 2 3 4 5 6 7 8 9 10 11
#mHE-SRS012902|scaffold|86.16 gnl|CDD|226997 47.62 42 17 2 164 201 210 250 5e-04 37.6
#mHE-SRS012902|scaffold|109.23 gnl|CDD|225183 47.46 236 122 1 1 236 475 708 1e-92 284
#mHE-SRS012902|scaffold|109.23 gnl|CDD|224055 44.35 239 130 2 1 239 332 567 2e-84 259
#mHE-SRS012902|scaffold|109.23 gnl|CDD|227321 39.50 238 140 3 1 238 324 557 9e-69 218