Skip to content

Commit

Permalink
Removed non-common tokens from GST
Browse files Browse the repository at this point in the history
  • Loading branch information
TwoOfTwelve committed Jan 19, 2025
1 parent 88e2120 commit 256364a
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 5 deletions.
10 changes: 5 additions & 5 deletions core/src/main/java/de/jplag/GreedyStringTiling.java
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ private JPlagComparison compareInternal(Submission leftSubmission, Submission ri
int[] leftValues = tokenValueListFromSubmission(leftSubmission, commonContexts);
int[] rightValues = tokenValueListFromSubmission(rightSubmission, commonContexts);

boolean[] leftMarked = calculateInitiallyMarked(leftSubmission);
boolean[] rightMarked = calculateInitiallyMarked(rightSubmission);
boolean[] leftMarked = calculateInitiallyMarked(leftSubmission, commonContexts);
boolean[] rightMarked = calculateInitiallyMarked(rightSubmission, commonContexts);

SubsequenceHashLookupTable leftLookupTable = subsequenceHashLookupTableForSubmission(leftSubmission, leftMarked, commonContexts);
SubsequenceHashLookupTable rightLookupTable = subsequenceHashLookupTableForSubmission(rightSubmission, rightMarked, commonContexts);
Expand Down Expand Up @@ -200,9 +200,9 @@ private void addMatchIfNotOverlapping(List<Match> matches, Match match) {
matches.add(match);
}

private boolean[] calculateInitiallyMarked(Submission submission) {
private boolean[] calculateInitiallyMarked(Submission submission, Set<Object> contexts) {
Set<Token> baseCodeTokens = baseCodeMarkings.get(submission);
List<Token> tokens = submission.getTokenList();
List<Token> tokens = submission.getTokenList(contexts);
boolean[] result = new boolean[tokens.size()];
for (int i = 0; i < result.length; i++) {
result[i] = tokens.get(i).getType().isExcludedFromMatching() || (baseCodeTokens != null && baseCodeTokens.contains(tokens.get(i)));
Expand All @@ -222,7 +222,7 @@ private SubsequenceHashLookupTable subsequenceHashLookupTableForSubmission(Submi
*/
private int[] tokenValueListFromSubmission(Submission submission, Set<Object> contexts) {
return cachedTokenValueLists.computeIfAbsent(submission, (key -> {
List<Token> tokens = key.getTokenList();
List<Token> tokens = key.getTokenList(contexts);
int[] tokenValueList = new int[tokens.size()];
for (int i = 0; i < tokens.size(); i++) {
TokenType type = tokens.get(i).getType().constrained(contexts);
Expand Down
4 changes: 4 additions & 0 deletions core/src/main/java/de/jplag/Submission.java
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,10 @@ public List<Token> getTokenList() {
return tokenList;
}

public List<Token> getTokenList(Set<Object> contexts) {
return tokenList.stream().filter(it -> !it.getType().constrained(contexts).getAttributes().isEmpty()).toList();
}

/**
* @return Whether a comparison between the submission and the base code is available.
*/
Expand Down

0 comments on commit 256364a

Please sign in to comment.