diff --git a/grails-app/conf/application.yml b/grails-app/conf/application.yml index 67cd322..f58c1dc 100644 --- a/grails-app/conf/application.yml +++ b/grails-app/conf/application.yml @@ -288,7 +288,7 @@ skin: orgNameLong: Atlas of Living Australia useLegacyAuto: false import: - sequence: collectory,taxonomy-all,vernacular,denormalise,layers,regions,localities,conservation-lists,wordpress,knowledgebase,biocollect,species-lists,favourites,weights,link-identifiers,images,occurrences,hidden-images,wiki-urls,suggest-index,sitemap,swap + sequence: collectory,taxonomy-all,vernacular,denormalise,layers,regions,localities,conservation-lists,wordpress,knowledgebase,biocollect,species-lists,favourites,weights,link-identifiers,images,occurrences,hidden-images,wiki-urls,suggest-index,sitemap,align-common-name,swap sequenceDaily: conservation-lists,wordpress,knowledgebase,biocollect,species-lists,favourites,suggest-index,images,hidden-images,wiki-urls,sitemap,swap sequenceWeekly: occurrences,layers,regions,localities,suggest-index,sitemap,swap # enable daily and weekly tasks diff --git a/grails-app/controllers/au/org/ala/bie/ImportController.groovy b/grails-app/controllers/au/org/ala/bie/ImportController.groovy index 0c1f41f..eb32f50 100644 --- a/grails-app/controllers/au/org/ala/bie/ImportController.groovy +++ b/grails-app/controllers/au/org/ala/bie/ImportController.groovy @@ -339,6 +339,13 @@ class ImportController { } + def alignCommonName() { + def online = params.getBoolean('online', false) + def job = execute("alignCommonName", "admin.button.alignCommonName", { importService.alignCommonName(online) }) + asJson (job.status()) + + } + // Documented in openapi.yml, not migrating to annotations because it is not intended for external use. def buildFavourites() { def online = params.getBoolean('online', false) diff --git a/grails-app/controllers/au/org/ala/bie/SearchController.groovy b/grails-app/controllers/au/org/ala/bie/SearchController.groovy index 7c327ef..bbff3cb 100644 --- a/grails-app/controllers/au/org/ala/bie/SearchController.groovy +++ b/grails-app/controllers/au/org/ala/bie/SearchController.groovy @@ -851,7 +851,7 @@ class SearchController implements GrailsConfigurationAware { in = QUERY, description = "Comma separated list of fields to display facets for. Available fields listed http://bie.ala.org.au/ws/indexFields.", schema = @Schema(implementation = String), - example = "datasetName,commonNameExact", + example = "datasetName,commonNameSingle", required = false ) ], diff --git a/grails-app/i18n/messages.properties b/grails-app/i18n/messages.properties index fd10fbf..1e0050c 100755 --- a/grails-app/i18n/messages.properties +++ b/grails-app/i18n/messages.properties @@ -93,6 +93,7 @@ admin.button.buildlinks=Build Link Identifiers admin.button.buildsuggestindex=Build Solr Suggestion Index admin.button.buildweights=Build Search and Suggest Weights admin.button.denormalise=Denormalise Taxa +admin.button.alignCommonName=Align Common Name admin.button.importall=Import all information admin.button.daily=Import daily information admin.button.weekly=Import weekly information diff --git a/grails-app/services/au/org/ala/bie/ImportService.groovy b/grails-app/services/au/org/ala/bie/ImportService.groovy index c8c9d2e..bdf2fc0 100644 --- a/grails-app/services/au/org/ala/bie/ImportService.groovy +++ b/grails-app/services/au/org/ala/bie/ImportService.groovy @@ -244,6 +244,9 @@ class ImportService implements GrailsConfigurationAware { case 'denormalize': denormaliseTaxa(online) break + case 'align-common-name': + alignCommonName(online) + break case 'favourites': buildFavourites(online) break @@ -1017,6 +1020,163 @@ class ImportService implements GrailsConfigurationAware { } } + /** + * Align the preferred common name with the namematching service + * + * 1. Align with idxtype:COMMON using the same method as ala-namematching-service + * 2. Align with ala-namematching-service + */ + def alignCommonName(online) { + alignCommonNameWithCommonRecords(online) + alignCommonNameWithNamematchingService(online) + } + + def alignCommonNameWithCommonRecords(online) { + int pageSize = BATCH_SIZE + int processed = 0 + def typeQuery = "guid:* AND (idxtype:TAXON OR idxtype:TAXONVARIANT OR idxtype:IDENTIFIER)" + def prevCursor + def cursor + + log("Starting common name alignment with common records scan for ${online ? 'online' : 'offline'} index") + try { + prevCursor = "" + cursor = CursorMarkParams.CURSOR_MARK_START + processed = 0 + + while (prevCursor != cursor) { + def startTime = System.currentTimeMillis() + SolrQuery query = new SolrQuery(typeQuery) + query.setParam('cursorMark', cursor) + query.setSort("id", SolrQuery.ORDER.asc) + query.setRows(pageSize) + def response = indexService.query(query, online) + def docs = response.results + int total = docs.numFound + def buffer = [] + + docs.each { doc -> + def taxonID = doc.guid + + def commonNames = searchService.lookupVernacular(taxonID, online) + + if (commonNames) { + def names = new LinkedHashSet(commonNames.collect { it.name }) + def update = [id: doc.id, commonNameSingle: [set: commonNames.get(0).name], commonName: [set: names]] + buffer << update + } + + if (buffer.size() >= BUFFER_SIZE) { + indexService.indexBatch(buffer, online) + buffer = [] + } + processed++ + } + + if (!buffer.isEmpty()) + indexService.indexBatch(buffer, online) + def percentage = total ? Math.round(processed * 100 / total) : 100 + def speed = total ? Math.round((pageSize * 1000) / (System.currentTimeMillis() - startTime)) : 0 + log("Processed ${processed} taxa (${percentage}%) speed ${speed} records per second") + if (total > 0) { + updateProgressBar(total, processed) + } + prevCursor = cursor + cursor = response.nextCursorMark + } + log("Finished scan") + } catch (Exception ex) { + log.error("Unable to perform common name with common records alignment scan", ex) + log("Error during scan: " + ex.getMessage()) + } + } + + def alignCommonNameWithNamematchingService(online) { + int pageSize = BATCH_SIZE + int processed = 0 + def typeQuery = "guid:* AND (idxtype:TAXON OR idxtype:TAXONVARIANT OR idxtype:IDENTIFIER)" + def prevCursor + def cursor + + log("Starting common name alignment scan for ${online ? 'online' : 'offline'} index") + try { + prevCursor = "" + cursor = CursorMarkParams.CURSOR_MARK_START + processed = 0 + + while (prevCursor != cursor) { + def startTime = System.currentTimeMillis() + SolrQuery query = new SolrQuery(typeQuery) + query.setParam('cursorMark', cursor) + query.setSort("id", SolrQuery.ORDER.asc) + query.setRows(pageSize) + def response = indexService.query(query, online) + def docs = response.results + int total = docs.numFound + def buffer = [] + def guids = [] + def updates = [:] + + docs.each { doc -> + def taxonID = doc.guid + + guids << taxonID + updates[taxonID] = [id: doc.id, commonName: doc.commonNameSingle] + + if (guids.size() >= BATCH_SIZE) { + def resultList = nameService.searchByIds(guids) + + for(def result : resultList) { + def vernacularName = result.vernacularName + + if (updates[result.taxonConceptID] && vernacularName != updates[result.taxonConceptID].commonName) { + def update = [id: updates[result.taxonConceptID].id, commonNameSingle: [set: vernacularName]] + buffer << update + } + } + guids = [] + updates = [:] + } + + if (buffer.size() >= BUFFER_SIZE) { + indexService.indexBatch(buffer, online) + buffer = [] + } + processed++ + } + if (!guids.isEmpty()) { + def resultList = nameService.searchByIds(guids) + + for(def result : resultList) { + def vernacularName = result.vernacularName + + if (updates[result.taxonConceptID] && vernacularName != updates[result.taxonConceptID].commonName) { + def update = [id: updates[result.taxonConceptID].id, commonNameSingle: [set: vernacularName]] + buffer << update + } + } + guids = [] + updates = [:] + } + + if (!buffer.isEmpty()) + indexService.indexBatch(buffer, online) + def percentage = total ? Math.round(processed * 100 / total) : 100 + def speed = total ? Math.round((pageSize * 1000) / (System.currentTimeMillis() - startTime)) : 0 + log("Processed ${processed} taxa (${percentage}%) speed ${speed} records per second") + if (total > 0) { + updateProgressBar(total, processed) + } + prevCursor = cursor + cursor = response.nextCursorMark + } + log("Finished scan") + } catch (Exception ex) { + log.error("Unable to perform common name alignment scan", ex) + log("Error during scan: " + ex.getMessage()) + } + } + /** * Update TAXON SOLR doc with conservation status info * @@ -2759,23 +2919,6 @@ class ImportService implements GrailsConfigurationAware { update["nameComplete"] = [set: nameComplete] } update['priority'] = [set: (int) Math.round(priority)] - def commonNames = searchService.lookupVernacular(guid, !online) - if (commonNames && !commonNames.isEmpty()) { - commonNames = commonNames.sort { n1, n2 -> - def s = n2.priority - n1.priority - if (s == 0 && commonLanguages) { - def s1 = commonLanguages.contains(n1.language) ? 1 : 0 - def s2 = commonLanguages.contains(n2.language) ? 1 : 0 - s = s1 - s2 - } - s - } - def single = commonNames.find({ it.status != deprecatedStatus.status && (!commonLanguages || commonLanguages.contains(it.language))})?.name - def names = new LinkedHashSet(commonNames.collect { it.name }) - update["commonName"] = [set: names] - update["commonNameExact"] = [set: names] - update["commonNameSingle"] = [set: single ] - } def identifiers = searchService.lookupIdentifier(guid, !online) if (identifiers) { update["additionalIdentifiers"] = [set: identifiers.collect { it.guid }] diff --git a/grails-app/services/au/org/ala/bie/NameService.groovy b/grails-app/services/au/org/ala/bie/NameService.groovy index a3bad20..6494f20 100644 --- a/grails-app/services/au/org/ala/bie/NameService.groovy +++ b/grails-app/services/au/org/ala/bie/NameService.groovy @@ -16,6 +16,7 @@ package au.org.ala.bie import au.org.ala.bie.util.Encoder import grails.config.Config +import grails.converters.JSON import grails.core.support.GrailsConfigurationAware import groovy.json.JsonSlurper @@ -79,4 +80,29 @@ class NameService implements GrailsConfigurationAware { return null return json.taxonConceptID } + + def searchByIds(def list) { + try { + def query = list.collect { [taxonConceptID: it] } + + def url = new URL(this.service + "/api/searchAllByClassification") + def bytes = (query as JSON).toString().getBytes("UTF-8") + + HttpURLConnection conn = (HttpURLConnection) url.openConnection() + conn.setRequestMethod("POST") + conn.setRequestProperty("Content-Type", "application/json") + conn.setRequestProperty("Content-Length", String.valueOf(bytes.length)) + conn.setDoOutput(true) + conn.getOutputStream().write(bytes) + + def txt = conn.getInputStream().text + def response = JSON.parse(txt) + + conn.disconnect() + + return response + } catch (err) { + log.error("Error calling " + this.service + "/api/searchByClassification, " + err.message) + } + } } diff --git a/grails-app/services/au/org/ala/bie/SearchService.groovy b/grails-app/services/au/org/ala/bie/SearchService.groovy index e6c92b3..b4a9ff7 100644 --- a/grails-app/services/au/org/ala/bie/SearchService.groovy +++ b/grails-app/services/au/org/ala/bie/SearchService.groovy @@ -805,6 +805,7 @@ class SearchService { datasetURL: datasetURL ] }, + commonNameSingle: taxon.commonNameSingle, commonNames: commonNames.collect { commonName -> def datasetURL = getDataset(commonName.datasetID, datasetMap)?.guid def datasetName = getDataset(commonName.datasetID, datasetMap)?.name diff --git a/grails-app/views/import/links.gsp b/grails-app/views/import/links.gsp index f06a31b..977f64c 100644 --- a/grails-app/views/import/links.gsp +++ b/grails-app/views/import/links.gsp @@ -36,6 +36,9 @@
+
+ +
@@ -76,6 +79,9 @@ function denormaliseTaxa(){ loadInfo("${createLink(controller:'import', action:'denormaliseTaxa')}?online=" + $('#use-online').is(':checked')); } + function alignCommonName(){ + loadInfo("${createLink(controller:'import', action:'alignCommonName')}?online=" + $('#use-online').is(':checked')); + } function removeDanglingSynonyms(){ loadInfo("${createLink(controller:'import', action:'deleteDanglingSynonyms')}?online=" + $('#use-online').is(':checked')); @@ -95,4 +101,4 @@ - \ No newline at end of file +