Skip to content

Commit aea212a

Browse files
committed
use alternate surface forms first
1 parent 85f16b2 commit aea212a

File tree

1 file changed

+37
-2
lines changed

1 file changed

+37
-2
lines changed

src/normaliser.cpp

+37-2
Original file line numberDiff line numberDiff line change
@@ -96,19 +96,54 @@ void Normaliser::run(std::istream& is, std::ostream& os)
9696
os << result[0] << std::endl;
9797
}
9898
else if ((!result.empty()) && (result[4].length() != 0)) {
99+
string outstring = string(result[0]);
99100
if (tags.empty()) {
100-
os << result[0] << std::endl;
101+
os << outstring << std::endl;
101102
}
102103
bool expand = false;
103104
for (auto tag : tags) {
104-
if (string(result[0]).find(tag) != std::string::npos) {
105+
if (outstring.find(tag) != std::string::npos) {
105106
if (verbose) {
106107
std::cout << "Expanding because of " << tag <<
107108
std::endl;
108109
}
109110
expand = true;
110111
}
111112
}
113+
// try find existing ",,,"phon tag or a alt surf. "<>"
114+
auto phonend = outstring.find("\"phon");
115+
auto phonstart = phonend;
116+
auto midtend = outstring.find("\"MIDTAPE");
117+
auto midtstart = phonend;
118+
auto altsurfstart = outstring.find("\"<", 3);
119+
auto altsurfend = outstring.find(">\"", 3);
120+
if ((altsurfstart != std::string::npos) &&
121+
(altsurfend != std::string::npos)) {
122+
surf = outstring.substr(altsurfstart + 2,
123+
altsurfend - altsurfstart - 2);
124+
if(verbose) {
125+
std::cout << "Using re-analysed surface form: " << surf <<
126+
std::endl;
127+
}
128+
} else if (phonstart != std::string::npos) {
129+
phonstart = outstring.rfind("\"", phonend - 1);
130+
surf = outstring.substr(phonstart + 1, phonend - phonstart - 1);
131+
outstring = outstring.replace(phonstart, phonend, "");
132+
if (verbose) {
133+
std::cout << "Using Phon(?): " << surf <<
134+
std::endl;
135+
}
136+
} else if (midtstart != std::string::npos) {
137+
midtstart = outstring.rfind("\"", midtend - 1);
138+
surf = outstring.substr(midtstart + 1, midtend - midtstart - 1);
139+
outstring = outstring.replace(midtstart, midtend, "");
140+
if (verbose) {
141+
std::cout << "Using MIDTAPE: " << surf <<
142+
std::endl;
143+
}
144+
} else if (verbose) {
145+
std::cout << "Using surf: " << surf << std::endl;
146+
}
112147
if (expand) {
113148
// 1. apply expansions from normaliser
114149
if (verbose) {

0 commit comments

Comments
 (0)