From 176581ba05c2f936af4a14d6273f784d1ee0dc25 Mon Sep 17 00:00:00 2001 From: Oliver Hepp Date: Wed, 13 Sep 2023 10:47:49 +0200 Subject: [PATCH] WSO2-214: Implemented RequiredFileNamePatterns feature --- .../synapse/commons/vfs/VFSConstants.java | 1 + .../apache/synapse/commons/vfs/VFSUtils.java | 96 ++++++++++++++++++ .../synapse/transport/vfs/PollTableEntry.java | 13 +++ .../transport/vfs/VFSTransportListener.java | 99 ++++++++++++++++++- 4 files changed, 208 insertions(+), 1 deletion(-) diff --git a/modules/commons/src/main/java/org/apache/synapse/commons/vfs/VFSConstants.java b/modules/commons/src/main/java/org/apache/synapse/commons/vfs/VFSConstants.java index 764162f69e..7d6b703fe5 100644 --- a/modules/commons/src/main/java/org/apache/synapse/commons/vfs/VFSConstants.java +++ b/modules/commons/src/main/java/org/apache/synapse/commons/vfs/VFSConstants.java @@ -40,6 +40,7 @@ public final class VFSConstants { public static final String TRANSPORT_FILE_FILE_URI = "transport.vfs.FileURI"; public static final String TRANSPORT_FILE_FILE_NAME_PATTERN = "transport.vfs.FileNamePattern"; + public static final String TRANSPORT_FILE_REQUIRED_FILE_NAME_PATTERNS = "transport.vfs.RequiredFileNamePatterns"; public static final String TRANSPORT_FILE_CONTENT_TYPE = "transport.vfs.ContentType"; public static final String TRANSPORT_FILE_LOCKING = "transport.vfs.Locking"; public static final String UPDATE_LAST_MODIFIED = "transport.vfs.UpdateLastModified"; diff --git a/modules/commons/src/main/java/org/apache/synapse/commons/vfs/VFSUtils.java b/modules/commons/src/main/java/org/apache/synapse/commons/vfs/VFSUtils.java index 9025114ec6..879b7e3c16 100644 --- a/modules/commons/src/main/java/org/apache/synapse/commons/vfs/VFSUtils.java +++ b/modules/commons/src/main/java/org/apache/synapse/commons/vfs/VFSUtils.java @@ -45,9 +45,11 @@ import java.net.InetAddress; import java.net.UnknownHostException; import java.text.SimpleDateFormat; +import java.util.ArrayList; import java.util.Arrays; import java.util.Date; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Random; @@ -747,6 +749,100 @@ public static String resolveUriHost (String uri, StringBuilder strBuilder) return uri; } + /** + * Returns a list of reqiredFileNamePatterns, which contains real values instead of variable parts. + * + * @param fileBaseName base name of a given file, which will be used to replace variable parts of requiredNamePatterns, + * @param fileNamePattern pattern for allowed file names + * @param requiredFileNamePatterns patterns that has to be updated by replacing variable parts with actual values based on the given fileBaseName + * @return list of reqiredFileNamePattern containing real values instead of variable parts + */ + public static List buildPatternReplaced(String fileBaseName, String fileNamePattern, String requiredFileNamePatterns) { + List result = new ArrayList(); + if(requiredFileNamePatterns != null && !requiredFileNamePatterns.isEmpty()){ + //run the fileNamePattern to the child Filename to find out the regex groups needed + Pattern pattern = Pattern.compile(fileNamePattern); + Matcher matcher = pattern.matcher(fileBaseName); + matcher.find(); + + //there are required filename pattern. Put the comma separated fileNamePatterns into a list. $1[.]hl7, $1[.]pdf + String[] requiredPatternsList = requiredFileNamePatterns.split(","); + + //now each pattern must be found in the children + for(String requiredPattern : requiredPatternsList){ + //find the group pattern $1 + //replace the group pattern by the corresponding group of the input file + String requiredPatternReplaced = requiredPattern; + for(int i=1;i<=matcher.groupCount();i++){ + if(requiredPatternReplaced.contains("$"+i)){ + requiredPatternReplaced = requiredPatternReplaced.replace("$"+i, matcher.group(i)); + } + } + result.add(requiredPatternReplaced); + } + } + return result; + } + + /** + * Checks, if each given requiredPattern has at least one matching file from a given array of files. + * + * @param requiredPatternReplacedList list of file name patterns that will be under examination + * @param initialChildren array of {@link FileObject}s, in which the matching will be made + * @return false if each given requiredPattern has at least one matching file from a given array of files, true otherwise. + */ + public static boolean hasRequiredPatternsMissing(List requiredPatternReplacedList, FileObject[] initialChildren){ + if(requiredPatternReplacedList == null){ + return false; + } + + for(String requiredPatternReplaced : requiredPatternReplacedList){ + boolean matches = false; + //check if the pattern exists in the children list + for(FileObject childInList:initialChildren){ + String childInListFilename = childInList.getName().getBaseName(); + matches = childInListFilename.matches(requiredPatternReplaced); + if(matches){ + break; //file with this pattern found in children + } + } + //if pattern is not in the list of files (chrildren) - return true (there is a missing required filenamepattern); + if(matches == false){ + return true; + } + } + + //as a default there + return false; + } + + /** + * Returns the first file name pattern that matches to a given filename from an array of files. + * @param requiredPatternReplacedList the list of filename patterns + * @param initialChildren array of files, in which a matching filename will be searched for a pattern + * @param referenceName filename to wich the pattern is searched + * @return the first pattern that matches to a given filename from an array of files. + */ + public static String getMatchedPattern(List requiredPatternReplacedList, FileObject[] initialChildren, String referenceName){ + if(requiredPatternReplacedList != null) { + for (String requiredPatternReplaced : requiredPatternReplacedList) { + //check if the pattern exists in the children list + boolean matches = false; + for(FileObject childInList:initialChildren){ + String childInListFilename = childInList.getName().getBaseName(); + matches = childInListFilename.matches(requiredPatternReplaced); + if(matches){ + if(childInListFilename.equals(referenceName)) { + return requiredPatternReplaced; + } + } + } + } + } + + return null; + } + /** * Extracts the hostname from a URI. The scheme://userinfo@ part has diff --git a/modules/transports/core/vfs/src/main/java/org/apache/synapse/transport/vfs/PollTableEntry.java b/modules/transports/core/vfs/src/main/java/org/apache/synapse/transport/vfs/PollTableEntry.java index 90dd5917f6..64408bd5bf 100644 --- a/modules/transports/core/vfs/src/main/java/org/apache/synapse/transport/vfs/PollTableEntry.java +++ b/modules/transports/core/vfs/src/main/java/org/apache/synapse/transport/vfs/PollTableEntry.java @@ -57,6 +57,8 @@ public class PollTableEntry extends AbstractPollTableEntry { private String replyFileURI; /** file name pattern for a directory or compressed file entry */ private String fileNamePattern; + /** File name patterns and order of processing the matching files */ + private String requiredFileNamePatterns; /** Content-Type to use for the message */ private String contentType; @@ -179,6 +181,14 @@ public String getFileNamePattern() { return fileNamePattern; } + public String getRequiredFileNamePatterns() { + return requiredFileNamePatterns; + } + + public boolean hasRequiredFileNamePatterns() { + return requiredFileNamePatterns != null && !requiredFileNamePatterns.isEmpty(); + } + public String getContentType() { return contentType; } @@ -532,6 +542,9 @@ protected boolean loadConfigurationsFromService(ParameterInclude params) throws fileNamePattern = ParamUtils.getOptionalParam(params, VFSConstants.TRANSPORT_FILE_FILE_NAME_PATTERN); + requiredFileNamePatterns = ParamUtils.getOptionalParam(params, + VFSConstants.TRANSPORT_FILE_REQUIRED_FILE_NAME_PATTERNS); + contentType = ParamUtils.getRequiredParam(params, VFSConstants.TRANSPORT_FILE_CONTENT_TYPE); diff --git a/modules/transports/core/vfs/src/main/java/org/apache/synapse/transport/vfs/VFSTransportListener.java b/modules/transports/core/vfs/src/main/java/org/apache/synapse/transport/vfs/VFSTransportListener.java index 073fa959fe..aee86229f7 100644 --- a/modules/transports/core/vfs/src/main/java/org/apache/synapse/transport/vfs/VFSTransportListener.java +++ b/modules/transports/core/vfs/src/main/java/org/apache/synapse/transport/vfs/VFSTransportListener.java @@ -64,6 +64,7 @@ import java.io.IOException; import java.io.InputStream; import java.text.SimpleDateFormat; +import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.Date; @@ -72,6 +73,8 @@ import java.util.Map; import java.util.Properties; import java.util.StringTokenizer; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import javax.mail.internet.ContentType; import javax.mail.internet.ParseException; @@ -417,7 +420,13 @@ protected void scanFileOrDirectory(final PollTableEntry entry, String fileURI) { Arrays.sort(children, new FileLastmodifiedtimestampDesComparator()); } log.debug("End Sorting the files."); - } + } + + // sorting by RequiredFileNamePatterns (if specified) + if(entry.hasRequiredFileNamePatterns() && children.length > 1){ + Arrays.sort(children, new RequiredFileNamesComparator(entry.getFileNamePattern(), entry.getRequiredFileNamePatterns())); + } + for (FileObject child : children) { // Stop processing any further when put to maintenance mode (shutting down or restarting) // Stop processing when service get undeployed @@ -454,6 +463,24 @@ protected void scanFileOrDirectory(final PollTableEntry entry, String fileURI) { if(entry.getFileNamePattern()!=null && child.getName().getBaseName().matches(entry.getFileNamePattern())){ + + //Check if RequiredFileNamePatterns is specified + if(entry.hasRequiredFileNamePatterns()){ + //Commit a complete list of children + String childBaseName = child.getName().getBaseName(); + List requiredPatternReplacedList = + VFSUtils.buildPatternReplaced(childBaseName, entry.getFileNamePattern(), entry.getRequiredFileNamePatterns()); + + boolean hasPatternMissing = + VFSUtils.hasRequiredPatternsMissing(requiredPatternReplacedList, children); + + if(hasPatternMissing) { + //Do not consume this file + log.debug("Not all required files exist for transferring file ["+VFSUtils.maskURLPassword(child.getName().getBaseName()) +"]. Required patterns: "+entry.getRequiredFileNamePatterns()); + continue; + } + } + //child's file name matches the file name pattern //now we try to get the lock and process if (log.isDebugEnabled()) { @@ -1195,4 +1222,74 @@ public int compare(FileObject o1, FileObject o2) { return lDiff.intValue(); } } + + class RequiredFileNamesComparator implements Comparator { + String fileNamePattern = null; + String requiredFileNamePatterns = null; + + public RequiredFileNamesComparator(String fileNamePattern, String requiredFileNamePatterns){ + this.fileNamePattern = fileNamePattern; + this.requiredFileNamePatterns = requiredFileNamePatterns; + } + + @Override + public int compare(FileObject o1, FileObject o2) { + //compairs the string of both file names by ignoring the "variable" part + // ex. abc-file1.hl7 and def-file1.pdf with regex [a-z]$1[.]pdf,[a-z]$1[.]hl7 will compair only the "file1" string to sort the files + /** + * complex example: + Regex: [a-z]+[-]$1[-][1-9]+[-]$2[.]txt,[a-z]+[-]$2[-][1-9]+[-]$1[.]hl7 + INPUT files in folder + aaa-testFileA-12345-date1.txt + bbb-testFileA-2344-date2.txt + ccc-testFileB-45345-date1.txt + ddd-date1-32423-testFileA.hl7 + eee-date1-123-testFileB.hl7 + fff-date2-2342-testFileA.hl7 + + SORTED files list + aaa-testFileA-12345-date1.txt + ddd-date1-32423-testFileA.hl7 + bbb-testFileA-2344-date2.txt + fff-date2-2342-testFileA.hl7 + ccc-testFileB-45345-date1.txt + eee-date1-123-testFileB.hl7 + + */ + String fileName1 = removeFixedPatterns(o1, requiredFileNamePatterns); + String fileName2 = removeFixedPatterns(o2, requiredFileNamePatterns); + return fileName1.compareTo(fileName2); + } + + private String removeFixedPatterns(FileObject child, String requiredFileNamePatterns) { + //get base name of file + String baseName = child.getName().getBaseName(); + //separate the patterns + String[] patterns = requiredFileNamePatterns.split(","); + for(int i=0;i groups = new ArrayList<>(); + //extract the $1 to $9 (max 9 groups can be used for now) and replace them with regex named groups + for (int j = 0; j <= 9; j++) { + if ( fixedPattern.matches(".*[$]["+j+"].*")) { + fixedPattern = fixedPattern.replaceAll("[$]["+j+"]", "(?.*)"); + groups.add("grp"+j); + } + } + Pattern pattern = Pattern.compile(fixedPattern); + Matcher matcher = pattern.matcher(baseName); + //match the "fixed" expression with named groups + if(matcher.find()) { + String relevantname = ""; + //reconstruct the filename that matters with the regex groups ($1-$9) + for (String groupName: groups) { + relevantname+=matcher.group(groupName); + } + //add the index of pattern at the end of the name, used to ensure that the first pattern is the first file read + return relevantname+i; + } + } + return ""; + } + } }