Skip to content

DO NOT MERGE - lambda integration via lookup pattern #245

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .env.test
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@ LINKRESOLVER_BASEURL=https://mit.primo.exlibrisgroup.com/discovery/openurl?insti
[email protected]
LIBKEY_KEY=FAKE_LIBKEY_KEY
LIBKEY_ID=FAKE_LIBKEY_ID
DETECTOR_LAMBDA_URL=http://localhost:3000
DETECTOR_LAMBDA_PATH=/foo
DETECTOR_LAMBDA_CHALLENGE_SECRET=secret_phrase
7 changes: 5 additions & 2 deletions app/models/detector/citation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class Detector
# hallmarks of being a citation.
# Phrases whose score is higher than the REQUIRED_SCORE value can be registered as a Detection.
class Citation
attr_reader :score, :subpatterns, :summary
attr_reader :features, :score, :subpatterns, :summary

# shared singleton methods
extend Detector::BulkChecker
Expand Down Expand Up @@ -67,10 +67,13 @@ def detection?
# @return Nothing intentional. Data is written to Hashes `@subpatterns`, `@summary`,
# and `@score` during processing.
def initialize(phrase)
@features = {}
@subpatterns = {}
@summary = {}
pattern_checker(phrase)
summarize(phrase)
@features = @subpatterns.deep_dup.transform_values(&:length).merge(summary)
@subpatterns.delete_if { |_, v| v == [] }
@score = calculate_score
end

Expand Down Expand Up @@ -141,7 +144,7 @@ def commas(phrase)
# @return hash
def pattern_checker(phrase)
CITATION_PATTERNS.each_pair do |type, pattern|
@subpatterns[type.to_sym] = scan(pattern, phrase) if scan(pattern, phrase).present?
@subpatterns[type.to_sym] = scan(pattern, phrase)
end
end

Expand Down
97 changes: 97 additions & 0 deletions app/models/lookup_citation.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# frozen_string_literal: true

class LookupCitation
# The info method is the way to return information about whether a given phrase is a citation. It consults an
# external lambda service (address in env) and returns either a true or a false. The default if anything goes wrong
# is to return false.
#
# @return Boolean or nil
def info(phrase)
return unless expected_env?

external_data = fetch(phrase)
return if external_data == 'Error'

external_data
end

private

def lambda_path
ENV.fetch('DETECTOR_LAMBDA_PATH', nil)
end

def lambda_secret
ENV.fetch('DETECTOR_LAMBDA_CHALLENGE_SECRET', nil)
end

def lambda_url
ENV.fetch('DETECTOR_LAMBDA_URL', nil)
end

# define_lambda connects to the detector lambda.
#
# @return Faraday connection
def define_lambda
Faraday.new(
url: lambda_url,
params: {}
)
end

# define_payload defines the Hash that will be sent to the lambda.
#
# @return Hash
def define_payload(phrase)
{
action: 'predict',
features: extract_features(phrase),
challenge_secret: lambda_secret
}
end

# expected_env? confirms that all three required environment variables are defined.
#
# @return Boolean
def expected_env?
Rails.logger.error('No lambda URL defined') if lambda_url.nil?

Rails.logger.error('No lambda path defined') if lambda_path.nil?

Rails.logger.error('No lambda secret defined') if lambda_secret.nil?

[lambda_url, lambda_path, lambda_secret].all?(&:present?)
end

# extract_features passes the search phrase through the citation detector, and massages the resulting features object
# to correspond with what the lambda expects.
#
# @return Hash
def extract_features(phrase)
features = Detector::Citation.new(phrase).features
features[:apa] = features.delete :apa_volume_issue
features[:year] = features.delete :year_parens
features.delete :characters
features
end

# Fetch handles the communication with the detector lambda: defining the connection, building the payload, and any
# error handling with the response.
#
# @return Boolean or 'Error'
def fetch(phrase)
lambda = define_lambda
payload = define_payload(phrase)

response = lambda.post(lambda_path, payload.to_json)

if response.status == 200
JSON.parse(response.body)['response'] == 'true'
else
Rails.logger.error(response.body)
Rails.logger.error(response.body['error'])

'Error'
end
end
end
27 changes: 26 additions & 1 deletion test/models/detector/citation_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@

class Detector
class CitationTest < ActiveSupport::TestCase
test 'detector::citation exposes three instance variables' do
test 'detector::citation exposes four instance variables' do
t = terms('citation')
result = Detector::Citation.new(t.phrase)

assert_predicate result.features, :present?

assert_predicate result.score, :present?

assert_predicate result.summary, :present?
Expand Down Expand Up @@ -196,6 +198,29 @@ class CitationTest < ActiveSupport::TestCase
assert_operator 0, :<, result.score
end

test 'features instance method is a hash of integers' do
result = Detector::Citation.new('simple search phrase')

assert_instance_of(Hash, result.features)

assert(result.features.all? { |_, v| v.integer? })
end

test 'features instance method includes all elements of citation detector regardless of search string' do
result_simple = Detector::Citation.new('simple')
result_complex = Detector::Citation.new('Science Education and Cultural Diversity: Mapping the Field. Studies in Science Education, 24(1), 49–73.')

assert_equal result_simple.features.length, result_complex.features.length
end

test 'features instance method should include all elements of citation patterns and summary thresholds' do
patterns = Detector::Citation.const_get :CITATION_PATTERNS
summary = Detector::Citation.const_get :SUMMARY_THRESHOLDS
result = Detector::Citation.new('simple')

assert_equal (patterns.length + summary.length), result.features.length
end

test 'detection? convenience method returns true for obvious citations' do
result = Detector::Citation.new(terms('citation').phrase)

Expand Down
43 changes: 43 additions & 0 deletions test/models/lookup_citation_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# frozen_string_literal: true

require 'test_helper'

class LookupCitationTest < ActiveSupport::TestCase
test 'DETECTOR_LAMBDA_CHALLENGE_SECRET is required' do
ClimateControl.modify DETECTOR_LAMBDA_CHALLENGE_SECRET: nil do
assert_nil(LookupCitation.new.info('ping'))
end
end

test 'DETECTOR_LAMBDA_PATH is required' do
ClimateControl.modify DETECTOR_LAMBDA_PATH: nil do
assert_nil(LookupCitation.new.info('ping'))
end
end

test 'DETECTOR_LAMBDA_URL is required' do
ClimateControl.modify DETECTOR_LAMBDA_URL: nil do
assert_nil(LookupCitation.new.info('ping'))
end
end

test 'lookup returns true when lambda running' do
# These cassettes should be regenerated once the lambda is running in AWS. For now it will need to be running
# on localhost should the cassettes need to be regenerated.
VCR.use_cassette('lambda running') do
prediction = LookupCitation.new.info('ping')

assert(prediction)
end
end

test 'lookup returns nil when challenge_secret is wrong' do
ClimateControl.modify DETECTOR_LAMBDA_CHALLENGE_SECRET: 'something wrong' do
VCR.use_cassette('lambda with wrong secret') do
prediction = LookupCitation.new.info('oops')

assert_nil(prediction)
end
end
end
end
1 change: 1 addition & 0 deletions test/test_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
require 'rails/test_help'

VCR.configure do |config|
config.ignore_localhost = false
config.cassette_library_dir = 'test/vcr_cassettes'
config.hook_into :webmock

Expand Down
37 changes: 37 additions & 0 deletions test/vcr_cassettes/lambda_running.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 39 additions & 0 deletions test/vcr_cassettes/lambda_with_wrong_secret.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.