Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Officer, Agency combined Search #376

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions alembic.ini
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,7 @@ formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

# set tables to ignore
[alembic:exclude]
tables = search_view
4 changes: 4 additions & 0 deletions alembic/env.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# flake8:noqa
from logging.config import fileConfig

from alembic import context
Expand All @@ -6,6 +7,9 @@

from backend.api import create_app
from backend.database import db
from backend.database.models.officer import Officer
from backend.database.models.agency import Agency
from backend.database.models.search_view import SearchView


# There's no access to current_app here so we must create our own app.
Expand Down
111 changes: 111 additions & 0 deletions alembic/versions/9c0b77dbee14_fulltext.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
"""fulltext

Revision ID: 9c0b77dbee14
Revises:
Create Date: 2024-04-21 02:53:30.947416

"""
from alembic import op
import sqlalchemy as sa
# import backend.database.models.types


# revision identifiers, used by Alembic.
revision = '9c0b77dbee14'
down_revision = None
branch_labels = None
depends_on = None
TRIGGER_TUPLES = [('officer', 'first_name'),
('officer', 'last_name'),
('officer', 'middle_name'),
('agency', 'name')]

index_set = [
"tsv_officer_first_name",
"tsv_officer_last_name",
"tsv_officer_middle_name",
"tsv_agency_name",
]


def upgrade():
# grab a connection to the database
conn = op.get_bind()
# create the materialized view
conn.execute(sa.sql.text('''
CREATE MATERIALIZED VIEW search_view AS
(SELECT
ROW_NUMBER() OVER () AS id,
officer.id AS officer_id,
officer.first_name AS officer_first_name,
to_tsvector(officer.first_name) AS tsv_officer_first_name,
officer.middle_name AS officer_middle_name,
to_tsvector(officer.middle_name) AS tsv_officer_middle_name,
officer.last_name AS officer_last_name,
to_tsvector(officer.last_name) AS tsv_officer_last_name,
officer.race AS officer_race,
officer.ethnicity AS officer_ethnicity,
officer.gender AS officer_gender,
officer.date_of_birth AS officer_date_of_birth,
agency.id AS agency_id,
agency.name AS agency_name,
to_tsvector(agency.name) AS tsv_agency_name,
agency.website_url AS agency_website_url,
agency.hq_address AS agency_hq_address,
agency.hq_city AS agency_hq_city,
agency.hq_zip AS agency_hq_zip,
agency.jurisdiction agency_jurisdiction
FROM officer
FULL OUTER JOIN agency ON
officer.first_name = agency.name
)
'''))
# create unique index on ids
op.create_index(op.f('idx_search_view_id'),
'search_view',
['id'],
unique=True)

# create remaining indices on the tsv columns
for index in index_set:
op.create_index(op.f(
'idx_tsv_{}'.format(index)),
'search_view', [index],
postgresql_using='gin'
)

# refresh materialized view trigger
conn.execute(sa.sql.text('''
CREATE OR REPLACE FUNCTION trig_refresh_search_view() RETURNS trigger AS
$$
BEGIN
REFRESH MATERIALIZED VIEW CONCURRENTLY search_view;
RETURN NULL;
END;
$$
LANGUAGE plpgsql ;
'''))
for table, column in TRIGGER_TUPLES:
conn.execute(sa.sql.text('''
DROP TRIGGER IF EXISTS tsv_{table}_{column}_trigger ON {table}
'''.format(table=table, column=column)))
conn.execute(sa.sql.text('''
CREATE TRIGGER tsv_{table}_{column}_trigger AFTER
TRUNCATE OR INSERT OR DELETE OR UPDATE OF {column}
ON {table} FOR EACH STATEMENT
EXECUTE PROCEDURE trig_refresh_search_view()
'''.format(table=table, column=column)))


def downgrade():
# grab a connection to the database
conn = op.get_bind()
# drop the materialized view
conn.execute(sa.sql.text('''
DROP MATERIALIZED VIEW search_view
'''))
for table, column in TRIGGER_TUPLES:
conn.execute(sa.sql.text('''
DROP TRIGGER IF EXISTS tsv_{table}_{column}_trigger ON {table}
'''.format(table=table, column=column)))

1 change: 1 addition & 0 deletions backend/database/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@
from .models.user import *
from .models.victim import *
from .models.partner import *
from .models.search_view import *
37 changes: 37 additions & 0 deletions backend/database/models/search_view.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from enum import Enum
from sqlalchemy.dialects.postgresql import TSVECTOR
from ..core import db


class Jurisdiction(str, Enum):
FEDERAL = "FEDERAL"
STATE = "STATE"
COUNTY = "COUNTY"
MUNICIPAL = "MUNICIPAL"
PRIVATE = "PRIVATE"
OTHER = "OTHER"


class SearchView(db.Model):
__tablename__ = 'search_view'
__table_args__ = {'info': dict(is_view=True)}

id = db.Column(db.Text, primary_key=True, index=True)
officer_id = db.Column(db.Integer)
officer_first_name = db.Column(db.Text)
tsv_officer_first_name = db.Column(TSVECTOR)
officer_middle_name = db.Column(db.Text)
tsv_officer_middle_name = db.Column(TSVECTOR)
officer_last_name = db.Column(db.Text)
tsv_officer_last_name = db.Column(TSVECTOR)
officer_race = db.Column(db.Text)
officer_ethnicity = db.Column(db.Text)
officer_gender = db.Column(db.Text)
officer_date_of_birth = db.Column(db.Date)
agency_id = db.Column(db.Integer)
agency_name = db.Column(db.Text)
agency_website_url = db.Column(db.Text)
agency_hq_address = db.Column(db.Text)
agency_hq_city = db.Column(db.Text)
agency_hq_zip = db.Column(db.Text)
agency_jurisdiction = db.Column(db.Enum(Jurisdiction))
2 changes: 1 addition & 1 deletion backend/routes/agencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
from backend.mixpanel.mix import track_to_mp
from backend.database.models.user import UserRole
from backend.database.models.officer import Officer
from flask_jwt_extended.view_decorators import jwt_required
from backend.database.models.employment import (
merge_employment_records,
Employment
)
from flask import Blueprint, abort, request
from flask_jwt_extended.view_decorators import jwt_required
from sqlalchemy.exc import DataError
from pydantic import BaseModel

Expand Down
101 changes: 101 additions & 0 deletions backend/routes/officer_agency_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from sqlalchemy.sql.functions import GenericFunction

from backend.database.models.user import UserRole
from ..database import SearchView, db
from flask import Blueprint, jsonify, request
from flask_jwt_extended.view_decorators import jwt_required
from backend.auth.jwt import min_role_required


class TSRank(GenericFunction):
package = 'full_text'
name = 'ts_rank'
inherit_cache = True


bp = Blueprint("search_route", __name__, url_prefix="/api/v1/search")

DEFAULT_PER_PAGE = 5


@bp.route("/", methods=["POST"])
@jwt_required
@min_role_required(UserRole.PUBLIC)
def search():
page = int(request.args.get('page', 1))
per_page = int(request.args.get('per_page', DEFAULT_PER_PAGE))
search_term = request.args.get('search_term')
query = db.session.query(
db.distinct(SearchView.id),
SearchView.officer_first_name,
SearchView.officer_middle_name,
SearchView.officer_last_name,
SearchView.agency_name,
SearchView.agency_hq_city,
SearchView.agency_jurisdiction,
db.func.max(db.func.full_text.ts_rank(
db.func.setweight(
db.func.coalesce(
SearchView.tsv_officer_first_name, ''), 'A')
.concat(
db.func.setweight(db.func.coalesce(
SearchView.tsv_officer_middle_name, ''), 'A'))
.concat(
db.func.setweight(db.func.coalesce(
SearchView.tsv_officer_last_name, ''), 'A'))
.concat(
db.func.setweight(db.func.coalesce(
SearchView.tsv_agency_name, ''), 'A')), db.func.to_tsquery(
search_term,
postgresql_regconfig='english')
)).label('rank')
).filter(db.or_(
SearchView.tsv_officer_first_name.match(
search_term,
postgresql_regconfig='english'),
SearchView.tsv_officer_last_name.match(
search_term,
postgresql_regconfig='english'),
SearchView.tsv_officer_middle_name.match(
search_term,
postgresql_regconfig='english'),
SearchView.tsv_agency_name.match(
search_term,
postgresql_regconfig='english')
)).group_by(
SearchView.id,
SearchView.officer_first_name,
SearchView.officer_middle_name,
SearchView.officer_last_name,
SearchView.agency_name,
SearchView.agency_hq_city,
SearchView.agency_jurisdiction
).order_by(db.text('rank DESC')).all()
results = []
for search_result in query:
if search_result.agency_name is None:
result_dict = {
"first_name" : search_result.officer_first_name,
"middle_name" : search_result.officer_middle_name,
"last_name" : search_result.officer_last_name,
}
elif search_result.first_name is None:
result_dict = {
"agency_name" : search_result.agency_name ,
"agency_hq_city" : search_result.agency_hq_city,
"agency_jurisdiction" : search_result.agency_jurisdiction
}
results.append(result_dict)
start_index = (page - 1) * per_page
end_index = min(start_index + per_page, len(results))
paginated_results = results[start_index:end_index]
response = {
"page": page,
"per_page": per_page,
"total_results": len(results),
"results": paginated_results
}
try:
return jsonify(response)
except Exception as e:
return (500, str(e))
Loading