Skip to content

Commit

Permalink
#373 align common name with namematching-service
Browse files Browse the repository at this point in the history
  • Loading branch information
Adam Collins committed Jan 29, 2024
1 parent 225269d commit 381e3bc
Show file tree
Hide file tree
Showing 8 changed files with 204 additions and 20 deletions.
2 changes: 1 addition & 1 deletion grails-app/conf/application.yml
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ skin:
orgNameLong: Atlas of Living Australia
useLegacyAuto: false
import:
sequence: collectory,taxonomy-all,vernacular,denormalise,layers,regions,localities,conservation-lists,wordpress,knowledgebase,biocollect,species-lists,favourites,weights,link-identifiers,images,occurrences,hidden-images,wiki-urls,suggest-index,sitemap,swap
sequence: collectory,taxonomy-all,vernacular,denormalise,layers,regions,localities,conservation-lists,wordpress,knowledgebase,biocollect,species-lists,favourites,weights,link-identifiers,images,occurrences,hidden-images,wiki-urls,suggest-index,sitemap,align-common-name,swap
sequenceDaily: conservation-lists,wordpress,knowledgebase,biocollect,species-lists,favourites,suggest-index,images,hidden-images,wiki-urls,sitemap,swap
sequenceWeekly: occurrences,layers,regions,localities,suggest-index,sitemap,swap
# enable daily and weekly tasks
Expand Down
7 changes: 7 additions & 0 deletions grails-app/controllers/au/org/ala/bie/ImportController.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,13 @@ class ImportController {

}

def alignCommonName() {
def online = params.getBoolean('online', false)
def job = execute("alignCommonName", "admin.button.alignCommonName", { importService.alignCommonName(online) })
asJson (job.status())

}

// Documented in openapi.yml, not migrating to annotations because it is not intended for external use.
def buildFavourites() {
def online = params.getBoolean('online', false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -851,7 +851,7 @@ class SearchController implements GrailsConfigurationAware {
in = QUERY,
description = "Comma separated list of fields to display facets for. Available fields listed http://bie.ala.org.au/ws/indexFields.",
schema = @Schema(implementation = String),
example = "datasetName,commonNameExact",
example = "datasetName,commonNameSingle",
required = false
)
],
Expand Down
1 change: 1 addition & 0 deletions grails-app/i18n/messages.properties
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ admin.button.buildlinks=Build Link Identifiers
admin.button.buildsuggestindex=Build Solr Suggestion Index
admin.button.buildweights=Build Search and Suggest Weights
admin.button.denormalise=Denormalise Taxa
admin.button.alignCommonName=Align Common Name
admin.button.importall=Import all information
admin.button.daily=Import daily information
admin.button.weekly=Import weekly information
Expand Down
177 changes: 160 additions & 17 deletions grails-app/services/au/org/ala/bie/ImportService.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,9 @@ class ImportService implements GrailsConfigurationAware {
case 'denormalize':
denormaliseTaxa(online)
break
case 'align-common-name':
alignCommonName(online)
break
case 'favourites':
buildFavourites(online)
break
Expand Down Expand Up @@ -1017,6 +1020,163 @@ class ImportService implements GrailsConfigurationAware {
}
}

/**
* Align the preferred common name with the namematching service
*
* 1. Align with idxtype:COMMON using the same method as ala-namematching-service
* 2. Align with ala-namematching-service
*/
def alignCommonName(online) {
alignCommonNameWithCommonRecords(online)
alignCommonNameWithNamematchingService(online)
}

def alignCommonNameWithCommonRecords(online) {
int pageSize = BATCH_SIZE
int processed = 0
def typeQuery = "guid:* AND (idxtype:TAXON OR idxtype:TAXONVARIANT OR idxtype:IDENTIFIER)"
def prevCursor
def cursor

log("Starting common name alignment with common records scan for ${online ? 'online' : 'offline'} index")
try {
prevCursor = ""
cursor = CursorMarkParams.CURSOR_MARK_START
processed = 0

while (prevCursor != cursor) {
def startTime = System.currentTimeMillis()
SolrQuery query = new SolrQuery(typeQuery)
query.setParam('cursorMark', cursor)
query.setSort("id", SolrQuery.ORDER.asc)
query.setRows(pageSize)
def response = indexService.query(query, online)
def docs = response.results
int total = docs.numFound
def buffer = []

docs.each { doc ->
def taxonID = doc.guid

def commonNames = searchService.lookupVernacular(taxonID, online)

if (commonNames) {
def names = new LinkedHashSet(commonNames.collect { it.name })
def update = [id: doc.id, commonNameSingle: [set: commonNames.get(0).name], commonName: [set: names]]
buffer << update
}

if (buffer.size() >= BUFFER_SIZE) {
indexService.indexBatch(buffer, online)
buffer = []
}
processed++
}

if (!buffer.isEmpty())
indexService.indexBatch(buffer, online)
def percentage = total ? Math.round(processed * 100 / total) : 100
def speed = total ? Math.round((pageSize * 1000) / (System.currentTimeMillis() - startTime)) : 0
log("Processed ${processed} taxa (${percentage}%) speed ${speed} records per second")
if (total > 0) {
updateProgressBar(total, processed)
}
prevCursor = cursor
cursor = response.nextCursorMark
}
log("Finished scan")
} catch (Exception ex) {
log.error("Unable to perform common name with common records alignment scan", ex)
log("Error during scan: " + ex.getMessage())
}
}

def alignCommonNameWithNamematchingService(online) {
int pageSize = BATCH_SIZE
int processed = 0
def typeQuery = "guid:* AND (idxtype:TAXON OR idxtype:TAXONVARIANT OR idxtype:IDENTIFIER)"
def prevCursor
def cursor

log("Starting common name alignment scan for ${online ? 'online' : 'offline'} index")
try {
prevCursor = ""
cursor = CursorMarkParams.CURSOR_MARK_START
processed = 0

while (prevCursor != cursor) {
def startTime = System.currentTimeMillis()
SolrQuery query = new SolrQuery(typeQuery)
query.setParam('cursorMark', cursor)
query.setSort("id", SolrQuery.ORDER.asc)
query.setRows(pageSize)
def response = indexService.query(query, online)
def docs = response.results
int total = docs.numFound
def buffer = []
def guids = []
def updates = [:]

docs.each { doc ->
def taxonID = doc.guid

guids << taxonID
updates[taxonID] = [id: doc.id, commonName: doc.commonNameSingle]

if (guids.size() >= BATCH_SIZE) {
def resultList = nameService.searchByIds(guids)

for(def result : resultList) {
def vernacularName = result.vernacularName

if (updates[result.taxonConceptID] && vernacularName != updates[result.taxonConceptID].commonName) {
def update = [id: updates[result.taxonConceptID].id, commonNameSingle: [set: vernacularName]]
buffer << update
}
}
guids = []
updates = [:]
}

if (buffer.size() >= BUFFER_SIZE) {
indexService.indexBatch(buffer, online)
buffer = []
}
processed++
}
if (!guids.isEmpty()) {
def resultList = nameService.searchByIds(guids)

for(def result : resultList) {
def vernacularName = result.vernacularName

if (updates[result.taxonConceptID] && vernacularName != updates[result.taxonConceptID].commonName) {
def update = [id: updates[result.taxonConceptID].id, commonNameSingle: [set: vernacularName]]
buffer << update
}
}
guids = []
updates = [:]
}

if (!buffer.isEmpty())
indexService.indexBatch(buffer, online)
def percentage = total ? Math.round(processed * 100 / total) : 100
def speed = total ? Math.round((pageSize * 1000) / (System.currentTimeMillis() - startTime)) : 0
log("Processed ${processed} taxa (${percentage}%) speed ${speed} records per second")
if (total > 0) {
updateProgressBar(total, processed)
}
prevCursor = cursor
cursor = response.nextCursorMark
}
log("Finished scan")
} catch (Exception ex) {
log.error("Unable to perform common name alignment scan", ex)
log("Error during scan: " + ex.getMessage())
}
}

/**
* Update TAXON SOLR doc with conservation status info
*
Expand Down Expand Up @@ -2759,23 +2919,6 @@ class ImportService implements GrailsConfigurationAware {
update["nameComplete"] = [set: nameComplete]
}
update['priority'] = [set: (int) Math.round(priority)]
def commonNames = searchService.lookupVernacular(guid, !online)
if (commonNames && !commonNames.isEmpty()) {
commonNames = commonNames.sort { n1, n2 ->
def s = n2.priority - n1.priority
if (s == 0 && commonLanguages) {
def s1 = commonLanguages.contains(n1.language) ? 1 : 0
def s2 = commonLanguages.contains(n2.language) ? 1 : 0
s = s1 - s2
}
s
}
def single = commonNames.find({ it.status != deprecatedStatus.status && (!commonLanguages || commonLanguages.contains(it.language))})?.name
def names = new LinkedHashSet(commonNames.collect { it.name })
update["commonName"] = [set: names]
update["commonNameExact"] = [set: names]
update["commonNameSingle"] = [set: single ]
}
def identifiers = searchService.lookupIdentifier(guid, !online)
if (identifiers) {
update["additionalIdentifiers"] = [set: identifiers.collect { it.guid }]
Expand Down
26 changes: 26 additions & 0 deletions grails-app/services/au/org/ala/bie/NameService.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package au.org.ala.bie

import au.org.ala.bie.util.Encoder
import grails.config.Config
import grails.converters.JSON
import grails.core.support.GrailsConfigurationAware
import groovy.json.JsonSlurper

Expand Down Expand Up @@ -79,4 +80,29 @@ class NameService implements GrailsConfigurationAware {
return null
return json.taxonConceptID
}

def searchByIds(def list) {
try {
def query = list.collect { [taxonConceptID: it] }

def url = new URL(this.service + "/api/searchAllByClassification")
def bytes = (query as JSON).toString().getBytes("UTF-8")

HttpURLConnection conn = (HttpURLConnection) url.openConnection()
conn.setRequestMethod("POST")
conn.setRequestProperty("Content-Type", "application/json")
conn.setRequestProperty("Content-Length", String.valueOf(bytes.length))
conn.setDoOutput(true)
conn.getOutputStream().write(bytes)

def txt = conn.getInputStream().text
def response = JSON.parse(txt)

conn.disconnect()

return response
} catch (err) {
log.error("Error calling " + this.service + "/api/searchByClassification, " + err.message)
}
}
}
1 change: 1 addition & 0 deletions grails-app/services/au/org/ala/bie/SearchService.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,7 @@ class SearchService {
datasetURL: datasetURL
]
},
commonNameSingle: taxon.commonNameSingle,
commonNames: commonNames.collect { commonName ->
def datasetURL = getDataset(commonName.datasetID, datasetMap)?.guid
def datasetName = getDataset(commonName.datasetID, datasetMap)?.name
Expand Down
8 changes: 7 additions & 1 deletion grails-app/views/import/links.gsp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@
<div>
<button id="denormalise-taxa" onclick="javascript:denormaliseTaxa()" class="btn btn-primary import-button"><g:message code="admin.button.denormalise"/></button>
</div>
<div>
<button id="align-common-name" onclick="javascript:alignCommonName()" class="btn btn-primary import-button"><g:message code="admin.button.alignCommonName"/></button>
</div>
<div>
<button id="build-link-identifiers" onclick="javascript:buildLinkIdentifiers()" class="btn btn-primary import-button"><g:message code="admin.button.buildlinks"/></button>
</div>
Expand Down Expand Up @@ -76,6 +79,9 @@
function denormaliseTaxa(){
loadInfo("${createLink(controller:'import', action:'denormaliseTaxa')}?online=" + $('#use-online').is(':checked'));
}
function alignCommonName(){
loadInfo("${createLink(controller:'import', action:'alignCommonName')}?online=" + $('#use-online').is(':checked'));
}

function removeDanglingSynonyms(){
loadInfo("${createLink(controller:'import', action:'deleteDanglingSynonyms')}?online=" + $('#use-online').is(':checked'));
Expand All @@ -95,4 +101,4 @@
</asset:script>
</div>
</body>
</html>
</html>

0 comments on commit 381e3bc

Please sign in to comment.