Skip to content

Commit 68a05cb

Browse files
Ko van der SlootKo van der Sloot
authored andcommitted
cleanup the API, and added more tests
1 parent f94e650 commit 68a05cb

File tree

3 files changed

+96
-40
lines changed

3 files changed

+96
-40
lines changed

include/frog/FrogAPI.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,15 @@ class FrogAPI {
108108
~FrogAPI();
109109
static std::string defaultConfigDir( const std::string& ="" );
110110
static std::string defaultConfigFile( const std::string& ="" );
111-
void FrogFile( const std::string&, std::ostream&, const std::string& );
111+
folia::Document *FrogFile( const std::string&, std::ostream& );
112112
void FrogServer( Sockets::ServerSocket &conn );
113113
void FrogInteractive();
114114
frog_data frog_sentence( std::vector<Tokenizer::Token>&,
115115
const size_t );
116+
std::string Frogtostring( const std::string& );
117+
std::string Frogtostringfromfile( const std::string& );
118+
119+
private:
116120
folia::Document *run_folia_engine( const std::string&,
117121
std::ostream& );
118122
folia::Document *run_text_engine( const std::string&,
@@ -122,10 +126,6 @@ class FrogAPI {
122126
folia::FoliaElement *append_to_folia( folia::FoliaElement *,
123127
const frog_data&,
124128
unsigned int& ) const;
125-
std::string Frogtostring( const std::string& );
126-
std::string Frogtostringfromfile( const std::string& );
127-
128-
private:
129129
void add_ner_result( folia::Sentence *,
130130
const frog_data&,
131131
const std::vector<folia::Word*>& ) const;

src/Frog.cxx

Lines changed: 72 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -618,7 +618,7 @@ int main(int argc, char *argv[]) {
618618
outS = new ofstream( outputFileName );
619619
}
620620
if ( fileNames.size() > 1 ){
621-
LOG << "start procesessing " << fileNames.size() << " files..." << endl;
621+
LOG << "start processing " << fileNames.size() << " files..." << endl;
622622
}
623623
for ( auto const& name : fileNames ){
624624
string testName = testDirName + name;
@@ -688,12 +688,11 @@ int main(int argc, char *argv[]) {
688688
LOG << "running some extra Frog tests...." << endl;
689689
if ( testName.find( ".xml" ) != string::npos ){
690690
options.doXMLin = true;
691+
options.doXMLout = true;
691692
}
692693
else {
693694
options.doXMLin = false;
694-
}
695-
if ( !xmlOutName.empty() && xmlOutName.find( ".xml" ) != string::npos ){
696-
options.doXMLout = true;
695+
options.doXMLout = false;
697696
}
698697
LOG << "Start test: " << testName << endl;
699698
stringstream ss;
@@ -715,16 +714,84 @@ int main(int argc, char *argv[]) {
715714
LOG << "test OK!" << endl;
716715
}
717716
LOG << "Done with:" << testName << endl;
717+
718+
//
719+
// also test FoLiA in en text out
720+
{
721+
if ( testName.find( ".xml" ) != string::npos ){
722+
options.doXMLin = true;
723+
options.doXMLout = false;
724+
}
725+
LOG << "Start test: " << testName << endl;
726+
stringstream ss;
727+
ifstream is( testName );
728+
string line;
729+
while ( getline( is, line ) ){
730+
ss << line << endl;
731+
}
732+
string s1 = frog.Frogtostring( ss.str() );
733+
*outS << "STRING 1 " << endl;
734+
*outS << s1 << endl;
735+
string s2 = frog.Frogtostringfromfile( testName );
736+
*outS << "STRING 2 " << endl;
737+
*outS << s2 << endl;
738+
if ( s1 != s2 ){
739+
LOG << "FAILED test :" << testName << endl;
740+
}
741+
else {
742+
LOG << "test OK!" << endl;
743+
}
744+
LOG << "Done with:" << testName << endl;
745+
}
746+
//
747+
// and even text in and FoLiA out
748+
{
749+
if ( testName.find( ".xml" ) == string::npos ){
750+
options.doXMLin = false;
751+
options.doXMLout = true;
752+
}
753+
LOG << "Start test: " << testName << endl;
754+
stringstream ss;
755+
ifstream is( testName );
756+
string line;
757+
while ( getline( is, line ) ){
758+
ss << line << endl;
759+
}
760+
options.docid = "test";
761+
string s1 = frog.Frogtostring( ss.str() );
762+
*outS << "STRING 1 " << endl;
763+
*outS << s1 << endl;
764+
string s2 = frog.Frogtostringfromfile( testName );
765+
*outS << "STRING 2 " << endl;
766+
*outS << s2 << endl;
767+
if ( s1 != s2 ){
768+
LOG << "FAILED test :" << testName << endl;
769+
}
770+
else {
771+
LOG << "test OK!" << endl;
772+
}
773+
LOG << "Done with:" << testName << endl;
774+
}
718775
}
719776
else {
777+
folia::Document *result = 0;
720778
try {
721-
frog.FrogFile( testName, *outS, xmlOutName );
779+
result = frog.FrogFile( testName, *outS );
722780
}
723781
catch ( exception& e ){
724782
LOG << "problem frogging: " << name << endl
725783
<< e.what() << endl;
726784
continue;
727785
}
786+
if ( !xmlOutName.empty() ){
787+
if ( !result ){
788+
LOG << "FAILED to create FoLiA??" << endl;
789+
}
790+
else {
791+
result->save( xmlOutName );
792+
LOG << "FoLiA stored in " << xmlOutName << endl;
793+
}
794+
}
728795
if ( !outName.empty() ){
729796
LOG << "results stored in " << outName << endl;
730797
if ( outS != &cout ){

src/FrogAPI.cxx

Lines changed: 19 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -969,24 +969,32 @@ string FrogAPI::Frogtostring( const string& s ){
969969
if ( s.empty() ){
970970
return s;
971971
}
972-
options.hide_timers = true;
973972
string tmp_file = tmpnam(0);
974973
ofstream os( tmp_file );
975974
os << s << endl;
976975
os.close();
977-
stringstream ss;
978-
FrogFile( tmp_file, ss, "" );
979-
return ss.str();
976+
return Frogtostringfromfile( tmp_file );
980977
}
981978

982-
string FrogAPI::Frogtostringfromfile( const string& name ){
979+
string FrogAPI::Frogtostringfromfile( const string& infilename ){
983980
/// Parse a file, Frog it and return the result as a string.
984981
/// @name: The filename.
985982
/// @return the results of frogging. Depending of the current frog settings
986983
/// the inputfile can be interpreted as XML, an the ouput will be XML or
987984
/// tab separated
985+
options.hide_timers = true;
986+
bool old_val = options.noStdOut;
988987
stringstream ss;
989-
FrogFile( name, ss, "" );
988+
if ( options.doXMLout ){
989+
options.noStdOut = true;
990+
}
991+
folia::Document *result = FrogFile( infilename, ss );
992+
options.noStdOut = old_val;
993+
if ( result ){
994+
result->set_kanon( options.doKanon );
995+
ss << result;
996+
delete result;
997+
}
990998
return ss.str();
991999
}
9921000

@@ -1665,7 +1673,7 @@ folia::Document *FrogAPI::run_folia_engine( const string& infilename,
16651673
return 0;
16661674
}
16671675
if ( options.doXMLout ){
1668-
return engine.doc(true);
1676+
return engine.doc(true); //disconnect from the engine!
16691677
}
16701678
return 0;
16711679
}
@@ -1707,43 +1715,23 @@ folia::Document *FrogAPI::run_text_engine( const string& infilename,
17071715
return doc;
17081716
}
17091717

1710-
void FrogAPI::FrogFile( const string& infilename,
1711-
ostream& os,
1712-
const string& xmlOutF ) {
1718+
folia::Document *FrogAPI::FrogFile( const string& infilename,
1719+
ostream& os ){
1720+
folia::Document *result = 0;
17131721
bool xml_in = options.doXMLin;
17141722
if ( TiCC::match_back( infilename, ".xml.gz" )
17151723
|| TiCC::match_back( infilename, ".xml.bz2" )
17161724
|| TiCC::match_back( infilename, ".xml" ) ){
17171725
// auto detect (compressed) xml.
17181726
xml_in = true;
17191727
}
1720-
string xmlOutFile = xmlOutF;
17211728
timers.reset();
1722-
folia::Document *result = 0;
17231729
if ( xml_in ){
1724-
// when the inputfile is .bz2 or .gz, we use the same compression on output
1725-
if ( !xmlOutFile.empty() ){
1726-
if ( TiCC::match_back( infilename, ".gz" ) ){
1727-
if ( !TiCC::match_back( xmlOutFile, ".gz" ) ){
1728-
xmlOutFile += ".gz";
1729-
}
1730-
}
1731-
else if ( TiCC::match_back( infilename, ".bz2" ) ){
1732-
if ( !TiCC::match_back( xmlOutFile, ".bz2" ) ){
1733-
xmlOutFile += ".bz2";
1734-
}
1735-
}
1736-
}
17371730
result = run_folia_engine( infilename, os );
17381731
}
17391732
else {
17401733
result = run_text_engine( infilename, os );
17411734
}
1742-
if ( result ){
1743-
result->save( xmlOutFile, options.doKanon );
1744-
LOG << "resulting FoLiA doc saved in " << xmlOutFile << endl;
1745-
delete result;
1746-
}
17471735
if ( !options.hide_timers ){
17481736
LOG << "tokenisation took: " << timers.tokTimer << endl;
17491737
LOG << "CGN tagging took: " << timers.tagTimer << endl;
@@ -1772,6 +1760,7 @@ void FrogAPI::FrogFile( const string& infilename,
17721760
}
17731761
LOG << "Frogging in total took: " << timers.frogTimer + timers.tokTimer << endl;
17741762
}
1763+
return result;
17751764
}
17761765

17771766
// the functions below here are ONLY used by TSCAN.

0 commit comments

Comments
 (0)