Skip to content

Commit

Permalink
Merge branch 'development' into dependabot/npm_and_yarn/node-polyfill…
Browse files Browse the repository at this point in the history
…-webpack-plugin-4.0.0
  • Loading branch information
mfacar authored Jun 18, 2024
2 parents 188040c + f81cc39 commit 109f97c
Show file tree
Hide file tree
Showing 29 changed files with 1,640 additions and 207 deletions.
30 changes: 24 additions & 6 deletions app/api/services/informationextraction/InformationExtraction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import {
FileWithAggregation,
getFilesForTraining,
getFilesForSuggestions,
propertyTypeIsWithoutExtractedMetadata,
propertyTypeIsSelectOrMultiSelect,
} from 'api/services/informationextraction/getFiles';
import { Suggestions } from 'api/suggestions/suggestions';
Expand Down Expand Up @@ -95,6 +96,16 @@ type MaterialsData =
| TextSelectionMaterialsData
| ValuesSelectionMaterialsData;

async function fetchCandidates(property: PropertySchema) {
const defaultLanguageKey = (await settings.getDefaultLanguage()).key;
const query: { template?: ObjectId; language: string } = {
language: defaultLanguageKey,
};
if (property.content !== '') query.template = new ObjectId(property.content);
const candidates = await entities.getUnrestricted(query, ['title', 'sharedId']);
return candidates;
}

class InformationExtraction {
static SERVICE_NAME = 'information_extraction';

Expand Down Expand Up @@ -142,9 +153,9 @@ class InformationExtraction {

let data: MaterialsData = { ..._data, language_iso };

const isSelect = propertyTypeIsSelectOrMultiSelect(propertyType);
const noExtractedData = propertyTypeIsWithoutExtractedMetadata(propertyType);

if (!isSelect && propertyLabeledData) {
if (!noExtractedData && propertyLabeledData) {
data = {
...data,
label_text: propertyValue || propertyLabeledData?.selection?.text,
Expand All @@ -155,7 +166,7 @@ class InformationExtraction {
};
}

if (isSelect) {
if (noExtractedData) {
if (!Array.isArray(propertyValue)) {
throw new Error('Property value should be an array');
}
Expand Down Expand Up @@ -184,7 +195,7 @@ class InformationExtraction {
);
const { propertyValue, propertyType } = file;

const missingData = propertyTypeIsSelectOrMultiSelect(propertyType)
const missingData = propertyTypeIsWithoutExtractedMetadata(propertyType)
? !propertyValue
: type === 'labeled_data' && !propertyLabeledData;

Expand Down Expand Up @@ -383,15 +394,22 @@ class InformationExtraction {

const params: TaskParameters = {
id: extractorId.toString(),
multi_value: property.type === 'multiselect',
multi_value: property.type === 'multiselect' || property.type === 'relationship',
};

if (property.type === 'select' || property.type === 'multiselect') {
if (propertyTypeIsSelectOrMultiSelect(property.type)) {
const thesauri = await dictionatiesModel.getById(property.content);

params.options =
thesauri?.values?.map(value => ({ label: value.label, id: value.id as string })) || [];
}
if (property.type === 'relationship') {
const candidates = await fetchCandidates(property);
params.options = candidates.map(candidate => ({
label: candidate.title || '',
id: candidate.sharedId || '',
}));
}

await this.taskManager.startTask({
task: 'create_model',
Expand Down
16 changes: 12 additions & 4 deletions app/api/services/informationextraction/getFiles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,16 @@ type FileEnforcedNotUndefined = {
};

const selectProperties: Set<string> = new Set([propertyTypes.select, propertyTypes.multiselect]);
const propertiesWithoutExtractedMetadata: Set<string> = new Set([
...Array.from(selectProperties),
propertyTypes.relationship,
]);

const propertyTypeIsSelectOrMultiSelect = (type: string) => selectProperties.has(type);

const propertyTypeIsWithoutExtractedMetadata = (type: string) =>
propertiesWithoutExtractedMetadata.has(type);

async function getFilesWithAggregations(files: (FileType & FileEnforcedNotUndefined)[]) {
const filesNames = files.filter(x => x.filename).map(x => x.filename);

Expand Down Expand Up @@ -98,7 +105,7 @@ async function fileQuery(
propertyType: string,
entitiesFromTrainingTemplatesIds: string[]
) {
const needsExtractedMetadata = !propertyTypeIsSelectOrMultiSelect(propertyType);
const needsExtractedMetadata = !propertyTypeIsWithoutExtractedMetadata(propertyType);
const query: {
type: string;
filename: { $exists: Boolean };
Expand All @@ -125,7 +132,7 @@ function entityForTrainingQuery(
const query: {
[key: string]: { $in?: ObjectIdSchema[]; $exists?: Boolean; $ne?: any[] };
} = { template: { $in: templates } };
if (propertyTypeIsSelectOrMultiSelect(propertyType)) {
if (propertyTypeIsWithoutExtractedMetadata(propertyType)) {
query[`metadata.${property}`] = { $exists: true, $ne: [] };
}
return query;
Expand Down Expand Up @@ -162,8 +169,8 @@ async function getFilesForTraining(templates: ObjectIdSchema[], property: string
return { ...file, propertyType };
}

if (propertyTypeIsSelectOrMultiSelect(propertyType)) {
const propertyValue = (entity.metadata[property] || []).map(({ value, label }) => ({
if (propertyTypeIsWithoutExtractedMetadata(propertyType)) {
const propertyValue = (entity.metadata?.[property] || []).map(({ value, label }) => ({
value: ensure<string>(value),
label: ensure<string>(label),
}));
Expand Down Expand Up @@ -223,5 +230,6 @@ export {
getFilesForSuggestions,
getSegmentedFilesIds,
propertyTypeIsSelectOrMultiSelect,
propertyTypeIsWithoutExtractedMetadata,
};
export type { FileWithAggregation };
10 changes: 9 additions & 1 deletion app/api/services/informationextraction/ixextractors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,16 @@ import {
createBlankSuggestionsForExtractor,
createBlankSuggestionsForPartialExtractor,
} from 'api/suggestions/blankSuggestions';
import { Subset } from 'shared/tsUtils';
import { PropertyTypeSchema } from 'shared/types/commonTypes';
import { IXExtractorModel as model } from './IXExtractorModel';

type AllowedPropertyTypes = 'title' | 'text' | 'numeric' | 'date' | 'select' | 'multiselect';
type AllowedPropertyTypes =
| Subset<
PropertyTypeSchema,
'text' | 'numeric' | 'date' | 'select' | 'multiselect' | 'relationship'
>
| 'title';

const ALLOWED_PROPERTY_TYPES: AllowedPropertyTypes[] = [
'title',
Expand All @@ -19,6 +26,7 @@ const ALLOWED_PROPERTY_TYPES: AllowedPropertyTypes[] = [
'date',
'select',
'multiselect',
'relationship',
];

const allowedTypeSet = new Set<string>(ALLOWED_PROPERTY_TYPES);
Expand Down
Loading

0 comments on commit 109f97c

Please sign in to comment.