forked from weizhongli/cdhit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
plot_len1.pl
executable file
·105 lines (98 loc) · 2.14 KB
/
plot_len1.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env perl
$file90 = shift;
$segs = shift;
@segs = split(/,/, $segs);
$len_segs = shift;
@len_segs = split(/,/,$len_segs);
my @clstr_nos = ();
my @clstr_len = ();
open(TMP, $file90) || die "Can not open file";
$readin = 0;
my $this_no = 0;
my $this_len = 0;
my $max_no = 0;
while(my $ll=<TMP>) {
if ($ll =~ /^>/ ) {
if ($readin) {
$clstr_nos[$this_no]++;
$max_no = $this_no if ($this_no>$max_no);
if (not defined($clstr_len[$this_no])) {
$clstr_len[$this_no] = [];
}
push(@{$clstr_len[$this_no]}, $this_len)
}
$this_no=0;
}
else {
$readin = 1;
$this_no++;
chop($ll);
if ($ll =~ /\*$/) {
if ($ll =~ /(\d+)(aa|nt), /) { $this_len=$1;}
}
}
}
close(TMP);
if ($readin) {
$clstr_nos[$this_no]++;
$max_no = $this_no if ($this_no>$max_no);
if (not defined($clstr_len[$this_no])) {
$clstr_len[$this_no] = [];
}
push(@{$clstr_len[$this_no]}, $this_len)
}
print "Size\tNo. seq\tNo. clstr";
my @tlen_nos = ();
for ($j=0; $j<@len_segs; $j++) {
$len_seg = $len_segs[$j];
print "\t$len_seg";
$tlen_nos[$j] = 0;
}
print "\n";
my $tno = 0;
my $tno1 = 0;
for ($i=0; $i<@segs; $i++) {
$seg = $segs[$i];
my @lens = ();
if ($seg =~ /-/) {
$no = 0;
$no1 = 0;
($b, $e) = split(/-/, $seg);
$e = $max_no if ($e =~ /up/i);
for($j=$b; $j<=$e; $j++) {
$no += $j * $clstr_nos[$j];
$no1+= $clstr_nos[$j];
push(@lens, @{$clstr_len[$j]});
}
$tno += $no; $tno1 += $no1;
print "$seg\t$no\t$no1";
}
else {
$tno += $seg * $clstr_nos[$seg];
$tno1 += $clstr_nos[$seg];
push(@lens, @{$clstr_len[$seg]});
print "$seg\t", $seg * $clstr_nos[$seg], "\t$clstr_nos[$seg]";
}
for ($j=0; $j<@len_segs; $j++) {
$len_seg = $len_segs[$j];
$no1 = 0;
my ($b, $e);
if ($len_seg =~ /-/) {
($b, $e) = split(/-/, $len_seg);
}
else {
$b = $e = $len_seg;
}
foreach $tlen (@lens) {
$no1++ if (($tlen>=$b) and ($tlen<=$e));
}
print "\t$no1";
$tlen_nos[$j] += $no1;
}
print "\n";
}
print "Total\t$tno\t$tno1";
for ($j=0; $j<@len_segs; $j++) {
print "\t$tlen_nos[$j]";
}
print "\n";