Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introducing RequiredFileNamePatterns feature to read files depending on the existence of other files #2099

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ public final class VFSConstants {

public static final String TRANSPORT_FILE_FILE_URI = "transport.vfs.FileURI";
public static final String TRANSPORT_FILE_FILE_NAME_PATTERN = "transport.vfs.FileNamePattern";
public static final String TRANSPORT_FILE_REQUIRED_FILE_NAME_PATTERNS = "transport.vfs.RequiredFileNamePatterns";
public static final String TRANSPORT_FILE_CONTENT_TYPE = "transport.vfs.ContentType";
public static final String TRANSPORT_FILE_LOCKING = "transport.vfs.Locking";
public static final String UPDATE_LAST_MODIFIED = "transport.vfs.UpdateLastModified";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,11 @@
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Random;
Expand Down Expand Up @@ -747,6 +749,100 @@ public static String resolveUriHost (String uri, StringBuilder strBuilder)
return uri;
}

/**
* Returns a list of reqiredFileNamePatterns, which contains real values instead of variable parts.
*
* @param fileBaseName base name of a given file, which will be used to replace variable parts of requiredNamePatterns,
* @param fileNamePattern pattern for allowed file names
* @param requiredFileNamePatterns patterns that has to be updated by replacing variable parts with actual values based on the given fileBaseName
* @return list of reqiredFileNamePattern containing real values instead of variable parts
*/
public static List<String> buildPatternReplaced(String fileBaseName, String fileNamePattern, String requiredFileNamePatterns) {
List<String> result = new ArrayList<String>();
if(requiredFileNamePatterns != null && !requiredFileNamePatterns.isEmpty()){
//run the fileNamePattern to the child Filename to find out the regex groups needed
Pattern pattern = Pattern.compile(fileNamePattern);
Matcher matcher = pattern.matcher(fileBaseName);
matcher.find();

//there are required filename pattern. Put the comma separated fileNamePatterns into a list. $1[.]hl7, $1[.]pdf
String[] requiredPatternsList = requiredFileNamePatterns.split(",");

//now each pattern must be found in the children
for(String requiredPattern : requiredPatternsList){
//find the group pattern $1
//replace the group pattern by the corresponding group of the input file
String requiredPatternReplaced = requiredPattern;
for(int i=1;i<=matcher.groupCount();i++){
if(requiredPatternReplaced.contains("$"+i)){
requiredPatternReplaced = requiredPatternReplaced.replace("$"+i, matcher.group(i));
}
}
result.add(requiredPatternReplaced);
}
}
return result;
}

/**
* Checks, if each given requiredPattern has at least one matching file from a given array of files.
*
* @param requiredPatternReplacedList list of file name patterns that will be under examination
* @param initialChildren array of {@link FileObject}s, in which the matching will be made
* @return <code>false</code> if each given requiredPattern has at least one matching file from a given array of files, <code>true</code> otherwise.
*/
public static boolean hasRequiredPatternsMissing(List<String> requiredPatternReplacedList, FileObject[] initialChildren){
if(requiredPatternReplacedList == null){
return false;
}

for(String requiredPatternReplaced : requiredPatternReplacedList){
boolean matches = false;
//check if the pattern exists in the children list
for(FileObject childInList:initialChildren){
String childInListFilename = childInList.getName().getBaseName();
matches = childInListFilename.matches(requiredPatternReplaced);
if(matches){
break; //file with this pattern found in children
}
}
//if pattern is not in the list of files (chrildren) - return true (there is a missing required filenamepattern);
if(matches == false){
return true;
}
}

//as a default there
return false;
}

/**
* Returns the first file name pattern that matches to a given filename from an array of files.
* @param requiredPatternReplacedList the list of filename patterns
* @param initialChildren array of files, in which a matching filename will be searched for a pattern
* @param referenceName filename to wich the pattern is searched
* @return the first pattern that matches to a given filename from an array of files.
*/
public static String getMatchedPattern(List<String> requiredPatternReplacedList, FileObject[] initialChildren, String referenceName){
if(requiredPatternReplacedList != null) {
for (String requiredPatternReplaced : requiredPatternReplacedList) {
//check if the pattern exists in the children list
boolean matches = false;
for(FileObject childInList:initialChildren){
String childInListFilename = childInList.getName().getBaseName();
matches = childInListFilename.matches(requiredPatternReplaced);
if(matches){
if(childInListFilename.equals(referenceName)) {
return requiredPatternReplaced;
}
}
}
}
}

return null;
}


/**
* Extracts the hostname from a URI. The scheme://userinfo@ part has
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ public class PollTableEntry extends AbstractPollTableEntry {
private String replyFileURI;
/** file name pattern for a directory or compressed file entry */
private String fileNamePattern;
/** File name patterns and order of processing the matching files */
private String requiredFileNamePatterns;
/** Content-Type to use for the message */
private String contentType;

Expand Down Expand Up @@ -179,6 +181,14 @@ public String getFileNamePattern() {
return fileNamePattern;
}

public String getRequiredFileNamePatterns() {
return requiredFileNamePatterns;
}

public boolean hasRequiredFileNamePatterns() {
return requiredFileNamePatterns != null && !requiredFileNamePatterns.isEmpty();
}

public String getContentType() {
return contentType;
}
Expand Down Expand Up @@ -532,6 +542,9 @@ protected boolean loadConfigurationsFromService(ParameterInclude params) throws
fileNamePattern = ParamUtils.getOptionalParam(params,
VFSConstants.TRANSPORT_FILE_FILE_NAME_PATTERN);

requiredFileNamePatterns = ParamUtils.getOptionalParam(params,
VFSConstants.TRANSPORT_FILE_REQUIRED_FILE_NAME_PATTERNS);

contentType = ParamUtils.getRequiredParam(params,
VFSConstants.TRANSPORT_FILE_CONTENT_TYPE);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Date;
Expand All @@ -72,6 +73,8 @@
import java.util.Map;
import java.util.Properties;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.mail.internet.ContentType;
import javax.mail.internet.ParseException;

Expand Down Expand Up @@ -417,7 +420,13 @@ protected void scanFileOrDirectory(final PollTableEntry entry, String fileURI) {
Arrays.sort(children, new FileLastmodifiedtimestampDesComparator());
}
log.debug("End Sorting the files.");
}
}

// sorting by RequiredFileNamePatterns (if specified)
if(entry.hasRequiredFileNamePatterns() && children.length > 1){
Arrays.sort(children, new RequiredFileNamesComparator(entry.getFileNamePattern(), entry.getRequiredFileNamePatterns()));
}

for (FileObject child : children) {
// Stop processing any further when put to maintenance mode (shutting down or restarting)
// Stop processing when service get undeployed
Expand Down Expand Up @@ -454,6 +463,24 @@ protected void scanFileOrDirectory(final PollTableEntry entry, String fileURI) {

if(entry.getFileNamePattern()!=null &&
child.getName().getBaseName().matches(entry.getFileNamePattern())){

//Check if RequiredFileNamePatterns is specified
if(entry.hasRequiredFileNamePatterns()){
//Commit a complete list of children
String childBaseName = child.getName().getBaseName();
List<String> requiredPatternReplacedList =
VFSUtils.buildPatternReplaced(childBaseName, entry.getFileNamePattern(), entry.getRequiredFileNamePatterns());

boolean hasPatternMissing =
VFSUtils.hasRequiredPatternsMissing(requiredPatternReplacedList, children);

if(hasPatternMissing) {
//Do not consume this file
log.debug("Not all required files exist for transferring file ["+VFSUtils.maskURLPassword(child.getName().getBaseName()) +"]. Required patterns: "+entry.getRequiredFileNamePatterns());
continue;
}
}

//child's file name matches the file name pattern
//now we try to get the lock and process
if (log.isDebugEnabled()) {
Expand Down Expand Up @@ -1195,4 +1222,74 @@ public int compare(FileObject o1, FileObject o2) {
return lDiff.intValue();
}
}

class RequiredFileNamesComparator implements Comparator<FileObject> {
String fileNamePattern = null;
String requiredFileNamePatterns = null;

public RequiredFileNamesComparator(String fileNamePattern, String requiredFileNamePatterns){
this.fileNamePattern = fileNamePattern;
this.requiredFileNamePatterns = requiredFileNamePatterns;
}

@Override
public int compare(FileObject o1, FileObject o2) {
//compairs the string of both file names by ignoring the "variable" part
// ex. abc-file1.hl7 and def-file1.pdf with regex [a-z]$1[.]pdf,[a-z]$1[.]hl7 will compair only the "file1" string to sort the files
/**
* complex example:
Regex: [a-z]+[-]$1[-][1-9]+[-]$2[.]txt,[a-z]+[-]$2[-][1-9]+[-]$1[.]hl7
INPUT files in folder
aaa-testFileA-12345-date1.txt
bbb-testFileA-2344-date2.txt
ccc-testFileB-45345-date1.txt
ddd-date1-32423-testFileA.hl7
eee-date1-123-testFileB.hl7
fff-date2-2342-testFileA.hl7

SORTED files list
aaa-testFileA-12345-date1.txt
ddd-date1-32423-testFileA.hl7
bbb-testFileA-2344-date2.txt
fff-date2-2342-testFileA.hl7
ccc-testFileB-45345-date1.txt
eee-date1-123-testFileB.hl7

*/
String fileName1 = removeFixedPatterns(o1, requiredFileNamePatterns);
String fileName2 = removeFixedPatterns(o2, requiredFileNamePatterns);
return fileName1.compareTo(fileName2);
}

private String removeFixedPatterns(FileObject child, String requiredFileNamePatterns) {
//get base name of file
String baseName = child.getName().getBaseName();
//separate the patterns
String[] patterns = requiredFileNamePatterns.split(",");
for(int i=0;i<patterns.length;i++){
String fixedPattern = patterns[i];
ArrayList<String> groups = new ArrayList<>();
//extract the $1 to $9 (max 9 groups can be used for now) and replace them with regex named groups
for (int j = 0; j <= 9; j++) {
if ( fixedPattern.matches(".*[$]["+j+"].*")) {
fixedPattern = fixedPattern.replaceAll("[$]["+j+"]", "(?<grp"+j+">.*)");
groups.add("grp"+j);
}
}
Pattern pattern = Pattern.compile(fixedPattern);
Matcher matcher = pattern.matcher(baseName);
//match the "fixed" expression with named groups
if(matcher.find()) {
String relevantname = "";
//reconstruct the filename that matters with the regex groups ($1-$9)
for (String groupName: groups) {
relevantname+=matcher.group(groupName);
}
Comment on lines +1285 to +1287
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we format the code segments? It seems both tabs and spaces are used. Let's use only spaces and format the code. Please check other places as well.

//add the index of pattern at the end of the name, used to ensure that the first pattern is the first file read
return relevantname+i;
}
}
return "";
}
}
}