Skip to content

Commit

Permalink
Support elements that only contain textContent, remove commas from th…
Browse files Browse the repository at this point in the history
…e end of addresses (#1440)

* Support textNodes, remove commas from the end of addresses

* Restructure the textContent statement

* Make code more testable, add tests
  • Loading branch information
brianhall authored Jan 30, 2025
1 parent 0f9d508 commit a0325f6
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 5 deletions.
19 changes: 15 additions & 4 deletions injected/src/features/broker-protection/actions/extract.js
Original file line number Diff line number Diff line change
Expand Up @@ -160,15 +160,26 @@ export function createProfile(elementFactory, extractData) {
}

/**
* @param {{innerText: string}[]} elements
* @param {({ textContent: string } | { innerText: string })[]} elements
* @param {string} key
* @param {ExtractProfileProperty} extractField
* @return {string[]}
*/
function stringValuesFromElements(elements, key, extractField) {
export function stringValuesFromElements(elements, key, extractField) {
return elements.map((element) => {
// todo: should we use textContent here?
let elementValue = rules[key]?.(element) ?? element?.innerText ?? null;
let elementValue;

if ('innerText' in element) {
elementValue = rules[key]?.(element) ?? element?.innerText ?? null;

// In instances where we use the text() node test, innerText will be undefined, and we fall back to textContent
} else if ('textContent' in element) {
elementValue = rules[key]?.(element) ?? element?.textContent ?? null;
}

if (!elementValue) {
return elementValue;
}

if (extractField?.afterText) {
elementValue = elementValue?.split(extractField.afterText)[1]?.trim() || elementValue;
Expand Down
3 changes: 3 additions & 0 deletions injected/src/features/broker-protection/extractors/address.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ function getCityStateCombos(inputList) {
// Strip out the zip code since we're only interested in city/state here.
item = item.replace(/,?\s*\d{5}(-\d{4})?/, '');

// Replace any commas at the end of the string that could confuse the city/state split.
item = item.replace(/,$/, '');

if (item.includes(',')) {
words = item.split(',').map((item) => item.trim());
} else {
Expand Down
17 changes: 16 additions & 1 deletion injected/unit-test/broker-protection-extract.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { aggregateFields, createProfile } from '../src/features/broker-protection/actions/extract.js';
import { aggregateFields, createProfile, stringValuesFromElements } from '../src/features/broker-protection/actions/extract.js';
import { cleanArray } from '../src/features/broker-protection/utils.js';

describe('create profiles from extracted data', () => {
Expand Down Expand Up @@ -360,4 +360,19 @@ describe('create profiles from extracted data', () => {

expect(actual.alternativeNames).toEqual(['Fred Firth', 'Jerry Doug', 'Marvin Smith', 'Roger Star']);
});

it('should extract innerText by default', () => {
const element = {
innerText: 'John Smith, 39',
textContent: 'Ignore me',
};
expect(stringValuesFromElements([element], 'testKey', { selector: 'example' })).toEqual(['John Smith, 39']);
});

it('should extract textElement if innerText is not present', () => {
const element = {
textContent: 'John Smith, 39',
};
expect(stringValuesFromElements([element], 'testKey', { selector: 'example' })).toEqual(['John Smith, 39']);
});
});

0 comments on commit a0325f6

Please sign in to comment.