Skip to content

Commit

Permalink
Ignore blank thumbnails when deduplicating
Browse files Browse the repository at this point in the history
  • Loading branch information
nickgaya committed May 25, 2021
1 parent 2753fa8 commit 7d4843c
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 9 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
- Make it easier to specify extension path
- Add mechanism for tests to query info from content script
- Add test suite for page type
- Ignore blank thumbnails when deduplicating
- Minor fixes and improvements

## Version 1.3
Expand Down
24 changes: 21 additions & 3 deletions rededup.js
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,15 @@ function linkIndexComparator(link1, link2) {
return link1[indexSymbol] - link2[indexSymbol];
}

// Set of thumbnail hashes that are likely to indicate a blank or solid-color
// image. We ignore thumbnails with these hash values to avoid false positives.
const hashesToIgnore = new Set([
new Uint8Array([0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]),
new Uint8Array([0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]),
new Uint8Array([0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]),
new Uint8Array([0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]),
].map(u8a => bufToString(u8a.buffer)));

/**
* Stats object.
*
Expand Down Expand Up @@ -684,8 +693,11 @@ class DuplicateFinder {
* @param {Set<DSNode>} merged Set of merged nodes to update
*/
updateThumbMap(linkInfo, node, merged) {
const thumbMap = this.getThumbMap(linkInfo.domain);
const hashStr = bufToString(linkInfo.thumbnailHash);
if (hashesToIgnore.has(hashStr)) {
return;
}
const thumbMap = this.getThumbMap(linkInfo.domain);
if (thumbMap.has(hashStr)) {
// Exact hash match, merge with existing node.
this.mergeDsNodes(thumbMap.get(hashStr), node, merged);
Expand Down Expand Up @@ -901,7 +913,10 @@ async function main() {
} else {
debugInfo.pending += 1;
processBatch(links, pageType, true, dupFinder, settings)
.catch((error) => console.error(error))
.catch((error) => {
console.error(error);
debugInfo.lastBatchError = String(error);
})
.finally(() => { debugInfo.pending -= 1; });
}

Expand All @@ -918,7 +933,10 @@ async function main() {
debugInfo.numLinks += links.length;
debugInfo.pending += 1;
processBatch(links, pageType, false, dupFinder, settings)
.catch((error) => console.error(error))
.catch((error) => {
console.error(error);
debugInfo.lastBatchError = String(error);
})
.finally(() => { debugInfo.pending -= 1; });
}
}
Expand Down
35 changes: 29 additions & 6 deletions tests/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -445,13 +445,27 @@ for (const browserSpec of getBrowsers()) {
"Expect hash text to be 16 hex digits in "
+ "brackets");
});
});

async function openSettingsPage() {
await driver.get({
firefox: `moz-extension://${extensionUuid}`,
chrome: `chrome-extension://${chromeExtensionId}`,
}[browser] + '/options/index.html');
}
suite('bugfixes', function() {
suite('blank thumbnails', function() {
for (const [name, eltId] of [['dct', 'dctHash'],
['difference', 'diffHash'],
['wavelet', 'waveletHash']]) {
test(`${name} hash`, async function() {
// Select hash function
await openSettingsPage();
const elt = await driver.findElement({id: eltId});
await elt.click();

// Verify that posts with solid-color thumbnails are not
// coalesced, as these tend to be false positives
await deduplicateTest(
['t3_nk4opn', 't3_nk30iz'],
['t3_nk4opn', 't3_nk30iz']);
});
}
});
});

/** Wait for the extension to initialize. */
Expand All @@ -466,6 +480,8 @@ for (const browserSpec of getBrowsers()) {
const debugInfo = result.debugInfo;
assert.isOk(debugInfo);
assert.isUndefined(debugInfo.initError, "Error in main()");
assert.isUndefined(debugInfo.lastBatchError,
"Error in processBatch()");
return debugInfo.initialized
&& !debugInfo.pending
&& debugInfo;
Expand Down Expand Up @@ -529,6 +545,13 @@ for (const browserSpec of getBrowsers()) {
"Expect number of links to match request");
return [debugInfo, links];
}

async function openSettingsPage() {
await driver.get({
firefox: `moz-extension://${extensionUuid}`,
chrome: `chrome-extension://${chromeExtensionId}`,
}[browser] + '/options/index.html');
}
});
}

Expand Down

0 comments on commit 7d4843c

Please sign in to comment.