-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfastamotifsearch.pl
executable file
·107 lines (97 loc) · 2.66 KB
/
fastamotifsearch.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/perl -s
#*************************************************************************
#
# Program: fastamotifsearch
# File: fastamotifsearch.pl
#
# Version: V0.1
# Date: 13.03.13
# Function: search a FASTA file for a sequence pattern of the form
# SAXSSXA
#
# Copyright: (c) Dr. Andrew C. R. Martin, UCL, 2013
# Author: Dr. Andrew C. R. Martin
# Address: Institute of Structural and Molecular Biology
# Division of Biosciences
# University College
# Gower Street
# London
# WC1E 6BT
# EMail: [email protected]
#
#*************************************************************************
#
# This program is not in the public domain, but it may be copied
# according to the conditions laid out in the accompanying file
# COPYING.DOC
#
# The code may be modified as required, but any modifications must be
# documented so that the person responsible can be identified. If
# someone else breaks this code, I don't want to be blamed for code
# that does not work!
#
# The code may not be sold commercially or included as part of a
# commercial product except as described in the file COPYING.DOC.
#
#*************************************************************************
#
# Description:
# ============
# Very simple program to find simple regular expression matches in a
# FASTA sequence file
#
#*************************************************************************
#
# Usage:
# ======
# fastamotifsearch pattern fastafile
#
#*************************************************************************
#
# Revision History:
# =================
#
#*************************************************************************
use strict;
my $pattern = shift @ARGV;
my $file = shift @ARGV;
open(FILE, $file) || die "Can't read FASTA file: $file";
$pattern =~ s/X/\./g;
while(1)
{
my ($label, $sequence) = GetFASTASequence();
last if ($label eq "");
if($sequence =~ $pattern)
{
print "$label\n";
}
}
$::labelline = "";
sub GetFASTASequence
{
my($label, $sequence);
return ("","") if(eof(FILE));
while(<FILE>)
{
chomp;
if(/^>/)
{
if($::labelline ne "")
{
$label = $::labelline;
$::labelline = $_;
return($label, $sequence);
}
else
{
$::labelline = $_;
}
}
else
{
$sequence .= $_;
$sequence =~ s/\s//g;
}
}
return($::labelline, $sequence);
}