From aaf703e999070bf9ed654cf3d19afc47ffb57db8 Mon Sep 17 00:00:00 2001 From: Rachid Yahiaoui Date: Wed, 12 Jul 2017 12:55:04 +0200 Subject: [PATCH] Add Filter CSV --- src/main/java/entypoint/Main.java | 130 +++++++++++------- .../java/org/inra/yedgen/csv/CsvFilter.java | 121 ++++++++++++++++ .../org/inra/yedgen/processor/Processor.java | 93 +++++++++---- .../org/inra/yedgen/processor/io/Writer.java | 2 +- 4 files changed, 264 insertions(+), 82 deletions(-) create mode 100644 src/main/java/org/inra/yedgen/csv/CsvFilter.java diff --git a/src/main/java/entypoint/Main.java b/src/main/java/entypoint/Main.java index b31e8ef..e4fa840 100644 --- a/src/main/java/entypoint/Main.java +++ b/src/main/java/entypoint/Main.java @@ -2,6 +2,9 @@ package entypoint ; import java.io.File ; +import java.util.List ; +import java.util.Arrays ; +import java.util.stream.Collectors ; import org.inra.yedgen.processor.Processor ; /** @@ -36,46 +39,54 @@ public static void main (String[] args) throws Exception { String classe = null ; int column = -1 ; String prefixFile = null , connecFile = null , def_prefix = null ; + Integer matchColumn = null ; String _matchWord = null ; + boolean includingGraphVariables = false , verbose = false ; int nbParams = 0 ; for ( int i = 0 ; i < args.length ; i++ ) { - String token = args[i] ; + String token = args[i] ; switch(token) { - case "-d" : directory = args[i+1] ; nbParams += 2 ; - break ; - case "-out" : outFile = args[i+1] ; nbParams += 2 ; - break ; - case "-ext" : ext = args[i+1] ; nbParams += 2 ; - break ; - case "-csv" : csv = args[i+1] ; nbParams += 2 ; - break ; - case "-prf" : prf = args[i+1] ; nbParams += 2 ; - break ; - case "-js" : js = args[i+1] ; nbParams += 2 ; - break ; - case "-class" : classe = args[i+1] ; nbParams += 2 ; - break ; - case "-column" : column = Integer.parseInt(args [ i+1 ] - .replaceAll(" +", "")) ; - nbParams += 2 ; - break ; - case "-ig" : includingGraphVariables = true ; - nbParams += 1 ; - break ; - case "-v" : verbose = true ; - nbParams += 1 ; - break ; - case "-def_prefix" : def_prefix = args[i+1] ; nbParams += 2 ; - break ; - case "-connecFile" : connecFile = args[i+1] ; nbParams += 2 ; - break ; - case "-prefixFile" : prefixFile = args[i+1] ; nbParams += 2 ; - break ; + case "-d" : directory = args[i+1] ; nbParams += 2 ; + break ; + case "-out" : outFile = args[i+1] ; nbParams += 2 ; + break ; + case "-ext" : ext = args[i+1] ; nbParams += 2 ; + break ; + case "-csv" : csv = args[i+1] ; nbParams += 2 ; + break ; + case "-prf" : prf = args[i+1] ; nbParams += 2 ; + break ; + case "-js" : js = args[i+1] ; nbParams += 2 ; + break ; + case "-class" : classe = args[i+1] ; nbParams += 2 ; + break ; + case "-column" : column = Integer.parseInt(args [ i+1 ] + .replaceAll(" +", "")) ; + nbParams += 2 ; + break ; + case "-ig" : includingGraphVariables = true ; + nbParams += 1 ; + break ; + case "-v" : verbose = true ; + nbParams += 1 ; + break ; + case "-def_prefix" : def_prefix = args[i+1] ; nbParams += 2 ; + break ; + case "-connecFile" : connecFile = args[i+1] ; nbParams += 2 ; + break ; + case "-prefixFile" : prefixFile = args[i+1] ; nbParams += 2 ; + break ; + case "-matchWord" : _matchWord = args[i+1] ; nbParams += 2 ; + break ; + case "-matchColumn" : matchColumn = Integer.parseInt(args [ i+1 ] + .replaceAll(" +", "")) ; + nbParams += 2 ; + break ; } } @@ -88,35 +99,50 @@ public static void main (String[] args) throws Exception { if( directory == null || directory.isEmpty() || outFile == null || outFile.isEmpty()) { - System.out.println (" directory or outFile is Empty " ) ; + System.out.println (" directory or outFile is Empty " ) ; return ; } - if(ext == null || ext.length() == 0 ) ext = ".graphml" ; + if(ext == null || ext.length() == 0 ) ext = ".graphml" ; + + List wordList = null ; + + if( _matchWord != null && ! _matchWord.isEmpty() ) { - long startTime = System.currentTimeMillis() ; + wordList = Arrays.asList( _matchWord.trim() + .replaceAll(" +", " ") + .split(",")) + .stream() + .map( word -> word.trim() ) + .collect(Collectors.toList()) ; + } + + long startTime = System.currentTimeMillis() ; - Processor processor = new Processor ( directory , - ext , - prf , - js , - connecFile , - prefixFile , - def_prefix ) ; + Processor processor = new Processor ( directory , + ext , + prf , + js , + connecFile , + prefixFile , + def_prefix ) ; - processor.process ( outFile , - csv , - includingGraphVariables , - classe , - column , - verbose ) ; + processor.process ( outFile , + csv , + includingGraphVariables , + classe , + column , + matchColumn , + wordList , + verbose ) ; - long executionTime = System.currentTimeMillis() - startTime ; + long executionTime = System.currentTimeMillis() - startTime ; - System.out.println(" Elapsed seconds : " + - executionTime / 1000 + " s" ) ; + System.out.println(" Elapsed seconds : " + + executionTime / 1000 + " s" ) ; - System.out.println(" ") ; - } + System.out.println(" ") ; + + } } diff --git a/src/main/java/org/inra/yedgen/csv/CsvFilter.java b/src/main/java/org/inra/yedgen/csv/CsvFilter.java new file mode 100644 index 0000000..d9fdedd --- /dev/null +++ b/src/main/java/org/inra/yedgen/csv/CsvFilter.java @@ -0,0 +1,121 @@ + +package org.inra.yedgen.csv ; + +import java.util.List ; +import java.util.Arrays ; +import java.util.Objects ; +import java.nio.file.Files ; +import java.nio.file.Paths ; +import java.util.ArrayList ; +import java.util.stream.Stream ; +import java.util.logging.Level ; +import java.util.logging.Logger ; +import java.util.stream.Collectors ; +import org.inra.yedgen.processor.io.Writer ; + +/** + * + * @author ryahiaoui + */ + +public class CsvFilter { + + + public static void main (String[] args) throws Exception { + + String csvFile = null ; + String outCsv = null ; + Integer matchColumn = null ; + String matchWord = null ; + String separator = null ; + + for ( int i = 0 ; i < args.length ; i++ ) { + + String token = args[i] ; + + switch(token) { + + case "-csv" : csvFile = args[i+1] ; + break ; + case "-outCsv" : outCsv = args[i+1] ; + break ; + case "-matchWord" : matchWord = args[i+1] ; + break ; + case "-matchColumn" : matchColumn = Integer.parseInt(args [ i+1 ] + .replaceAll(" +", "")) ; + break ; + case "-separator" : separator = args[i+1] ; + break ; + } + } + + Objects.requireNonNull( csvFile ) ; + Objects.requireNonNull( outCsv ) ; + + List wordList = null ; + + if( matchWord != null && ! matchWord.isEmpty() ) { + + wordList = Arrays.asList( matchWord.trim() + .replaceAll(" +", " ") + .split(",")) + .stream() + .map( word -> word.trim() ) + .collect(Collectors.toList()) ; + } + + /* Read File and Filter */ + + String _outCsv = outCsv ; + Integer _matchColumn = matchColumn ; + List _wordList = wordList ; + String _separator = separator ; + + + System.out.println(" " ) ; + System.out.println(" ********************************** " ) ; + System.out.println(" - Input CSV File : " + _outCsv ) ; + System.out.println(" - scv_separator : " + _separator ) ; + System.out.println(" - matchColumn : " + _matchColumn ) ; + System.out.println(" - wordList : " + _wordList ) ; + System.out.println(" ********************************** " ) ; + System.out.println(" " ) ; + + + List outLines = new ArrayList<>() ; + + outLines.add( Files.lines(Paths.get(csvFile)).findFirst().get()) ; + + try ( Stream lines = Files.lines(Paths.get(csvFile)).skip(1)) { + + lines.forEach ( line -> { + + if( _matchColumn != null && _matchColumn > 0 && + _wordList != null && ! _wordList.isEmpty() ) { + + if( _wordList.contains( line.split(_separator)[ _matchColumn ] + .trim() + .replaceAll(" +", " "))) { + outLines.add(line ) ; + } + } + }) ; + + } catch (Exception ex) { + Logger.getLogger(CsvFilter.class.getName()).log(Level.SEVERE, null, ex) ; + } + + Writer.checkFile( _outCsv ) ; + Writer.writeTextFile( outLines , _outCsv) ; + + if( outLines.size() == 1 ) { + System.out.println(" -> Empty CSV File Generated ") ; + System.out.println(" ") ; + } + else { + System.out.println(" -> CSV File Generated at : " + _outCsv ) ; + System.out.println(" ") ; + } + } + +} diff --git a/src/main/java/org/inra/yedgen/processor/Processor.java b/src/main/java/org/inra/yedgen/processor/Processor.java index f56238d..1564dda 100644 --- a/src/main/java/org/inra/yedgen/processor/Processor.java +++ b/src/main/java/org/inra/yedgen/processor/Processor.java @@ -56,22 +56,22 @@ public class Processor { private boolean verbose ; - public Processor( String directory , - String extensionFile , - String propertieFile , - String jsFile , - String connecFile , - String prefixFile , - String default_prefix ) throws Exception { + public Processor( String directory , + String extensionFile , + String propertieFile , + String jsFile , + String connecFile , + String prefixFile , + String default_prefix ) throws Exception { - this.graphExtractor = new GraphExtractor () ; + this.graphExtractor = new GraphExtractor () ; - graphExtractor.genGraphPopulatingManagers( directory , extensionFile ) ; + graphExtractor.genGraphPopulatingManagers( directory , extensionFile ) ; if(default_prefix != null ) GraphExtractor.PREFIX_PREDICAT = default_prefix ; /* Add External Prefixs if prefixFile not null */ - updateConnection( connecFile, this.graphExtractor.getSourceDeclaration()) ; + updateConnection( connecFile, this.graphExtractor.getSourceDeclaration()) ; /* Add External Prefixs if prefixFile not null */ updatePrefixs(prefixFile, this.graphExtractor.getPrefixs()) ; @@ -141,12 +141,19 @@ private ManagerNode instantiateManagerNode ( ManagerEdge managerEdge , FactoryNo } - public boolean processFull ( String outputFile , - String csvFile , - String classe , - int column ) { + public boolean processFull ( String outputFile , + String csvFile , + String classe , + int column , + Integer matchColumn , + List matchWord ) { - boolean processCSV = processOnlyCSV( outputFile, csvFile, classe, column ) ; + boolean processCSV = processOnlyCSV ( outputFile , + csvFile , + classe , + column , + matchColumn , + matchWord ) ; boolean processVariables = processOnlyGraphVariables( outputFile ) ; @@ -210,10 +217,12 @@ public boolean processOnlyGraphVariables ( String outputFile ) { } - public boolean processOnlyCSV ( String outputFile , - String csvFile , - String classe , - int column ) { + public boolean processOnlyCSV ( String outputFile , + String csvFile , + String classe , + int column , + Integer matchColumn , + List matchWord ) { Messages.printMessageStartProcessCsvVariableGeneration( csvFile ) ; @@ -262,6 +271,19 @@ public void accept(String line) { return ; } } + + if( matchColumn != null && matchColumn > 0 && + matchWord != null && ! matchWord.isEmpty() ) { + + if( ! matchWord.contains( line.split( metaPatternManager + .getCSV_SEPARATOR())[ matchColumn ] + .trim() + .replaceAll(" +", " "))) { + counter ++ ; + + return ; + } + } try { @@ -380,23 +402,36 @@ public boolean processOnlyGraphWithoutVariables ( String outputFile ) { } - public void process ( String outputFile , - String csvFile , - boolean includingGraphVariables , - String classe , - int column , - boolean verbose ) { + public void process ( String outputFile , + String csvFile , + boolean includingGraphVariables , + String classe , + int column , + Integer matchColumn , + List matchWord , + boolean verbose ) { this.verbose = verbose ; boolean process = false ; if( includingGraphVariables && csvFile != null ) { - process = processFull( outputFile, csvFile, classe, column ) ; + + process = processFull( outputFile , + csvFile , + classe , + column , + matchColumn , + matchWord ) ; } - else if ( ! includingGraphVariables && - csvFile != null ) { - process = processOnlyCSV( outputFile, csvFile, classe, column ) ; + else if ( ! includingGraphVariables && csvFile != null ) { + + process = processOnlyCSV( outputFile , + csvFile , + classe , + column , + matchColumn , + matchWord ) ; } else { process = processOnlyGraphVariables( outputFile ) ; diff --git a/src/main/java/org/inra/yedgen/processor/io/Writer.java b/src/main/java/org/inra/yedgen/processor/io/Writer.java index e9b323e..20955ca 100644 --- a/src/main/java/org/inra/yedgen/processor/io/Writer.java +++ b/src/main/java/org/inra/yedgen/processor/io/Writer.java @@ -86,6 +86,6 @@ private static void deleteFile( String path ) throws IOException { private static void createFile( String path ) throws IOException { File file = new File(path) ; - file.createNewFile() ; + file.createNewFile() ; } }