Skip to content

Commit 3209040

Browse files
committed
added notes in solr schema; changed field names; updated routes and mapper
1 parent 5b3fa40 commit 3209040

File tree

5 files changed

+69
-24
lines changed

5 files changed

+69
-24
lines changed

solr/solr/conf/schema.xml

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,7 @@
368368
Longer patterns will be matched first. if equal size patterns
369369
both match, the first appearing in the schema will be used. -->
370370
<dynamicField name="*_i" type="sint" indexed="true" stored="true"/>
371-
<dynamicField name="*_s" type="string" indexed="true" stored="true" termVectors="true"/>
371+
<dynamicField name="*_s" type="string" indexed="true" stored="true" termVectors="true" multiValued="false"/>
372372
<dynamicField name="*_mvs" type="string" indexed="true" stored="true" multiValued="true"/>
373373
<dynamicField name="*_l" type="slong" indexed="true" stored="true"/>
374374
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
@@ -380,7 +380,8 @@
380380

381381
<dynamicField name="*_facet" type="string" indexed="true" stored="true" multiValued="true"/>
382382

383-
<dynamicField name="*_id" type="string" indexed="true" stored="true" termVectors="true"/>
383+
<!-- a single-valued string with a convenient name -->
384+
<dynamicField name="*_id" type="string" indexed="true" stored="true" termVectors="true" multiValued="false"/>
384385

385386
<dynamicField name="random*" type="random" />
386387

@@ -406,16 +407,22 @@
406407
<!-- copyField commands copy one field to another at the time a document
407408
is added to the index. It's used either to index the same field differently,
408409
or to add multiple fields to the same field for easier/faster searching. -->
409-
<copyField source="title" dest="text"/>
410+
411+
<!-- <copyField source="title" dest="text"/>
410412
<copyField source="title" dest="titleSort"/>
411413
<copyField source="title" dest="alphaTitleSort"/>
412414
413-
<copyField source="title" dest="spell"/>
415+
<copyField source="title" dest="spell"/> -->
414416

415-
<copyField source="*_t" dest="text"/>
417+
<!-- make these string based fields more searchable -->
418+
<copyField source="*_facet" dest="text"/>
416419
<copyField source="*_s" dest="text"/>
417420
<copyField source="*_mvs" dest="text"/>
418421

422+
<!-- <copyField source="*_t" dest="text"/>
423+
<copyField source="*_s" dest="text"/>
424+
<copyField source="*_mvs" dest="text"/> -->
425+
419426
<!-- Similarity is the scoring routine for each document vs. a query.
420427
A custom similarity may be specified here, but the default is fine
421428
for most applications. -->

web/app/controllers/swinburne_controller.rb

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,12 @@ def index
55
end
66

77
def poem
8-
@response = Swinburne.find_by_poem_slug params[:poem_slug]
8+
@response = Swinburne.find_by_poem_title_id params[:poem_title_id]
99
end
1010

1111
def poem_page
12-
@response = Swinburne.find_by_local_id params[:local_id]
13-
doc = @response.docs.first
14-
@relatives = Swinburne.find :fq => [%(collapse_id:"#{doc[:collapse_id]}"), %(poem_title_facet:"#{doc[:poem_title_facet]}")], :rows => 999999
12+
@response = Swinburne.find_by_local_id params[:poem_title_id]
13+
@relatives = Swinburne.find_relatives_of @response.docs.first
1514
end
1615

1716
end

web/app/models/swinburne.rb

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,25 +7,25 @@ def self.find input_params
77
:q => input_params[:q],
88
:qt => "dismax",
99
:fq => %(collection_id:"swinburne"),
10-
'facet.field' => ['poem_title_facet'],
10+
'facet.field' => ['poem_title_s'],
1111
'facet' => true,
1212
'facet.mincount' => 1,
1313
:rows => 2_000_000_000,
1414
'hl' => 'true',
1515
'hl.fl' => 'xml_t',
1616
'hl.fragsize' => 100,
17-
:fl => 'id,score,poem_title_facet,local_id,page_s'
17+
:fl => 'id,score,poem_title_s,local_id,page_number_s'
1818
}.merge(input_params)
1919
connection.find search_params
2020
end
2121

22-
def self.find_by_poem_slug slug
22+
def self.find_by_poem_title_id title_id
2323
connection.find(
24-
:q=>%(poem_slug_s:"#{slug}"),
24+
:q=>%(poem_title_id:"#{poem_title_id}"),
2525
:fq => %(collection_id:"swinburne"),
2626
:rows => 2_000_000_000,
2727
'facet' => true,
28-
'facet.field' => ['variant_facet'],
28+
'facet.field' => ['variant_s'],
2929
'facet' => true,
3030
'facet.mincount' => 1
3131
)
@@ -35,4 +35,9 @@ def self.find_by_local_id local_id
3535
connection.find :q => %(id:"swinburne-#{local_id}"), :rows => 1
3636
end
3737

38+
# think "more like this"...
39+
def self.find_relatives_of solr_doc
40+
Swinburne.find :fq => [%(collapse_id:"#{solr_doc[:collapse_id]}"), %(poem_title_facet:"#{solr_doc[:poem_title_facet]}")], :rows => 999999
41+
end
42+
3843
end

web/config/routes.rb

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,16 @@
33
map.root :controller => 'pages', :action => 'index'
44

55
map.swinburne '/swinburne', :controller => 'swinburne', :action => 'index'
6-
map.swinburne_poem '/swinburne/:poem_slug', :controller => 'swinburne', :action => 'poem'
7-
map.swinburne_poem_page '/swinburne/:poem_slug/:local_id', :controller => 'swinburne', :action => 'poem_page'
6+
map.swinburne_poem '/swinburne/:poem_title_id', :controller => 'swinburne', :action => 'poem'
7+
map.swinburne_poem_page '/swinburne/:poem_title_id/:local_id', :controller => 'swinburne', :action => 'poem_page'
8+
9+
# /swinburne
10+
# /swinburne/:variant_id
11+
# /swinburne/:variant_id/:poem_id
12+
# /swinburne/:variant_id/:poem_id/:page_number
13+
14+
# poem, all variants -- used for comparisons/text-diffs
15+
# /swinburne/:poem_id
16+
# /swinburne/:poem_id/:page_number
817

918
end

web/lib/swinburne_mapper.rb

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
require 'raven'
22
require 'nokogiri_fragmenter'
3+
4+
# string_ext brings in the to_slug method for strings
35
require 'string_ext'
46

57
class SwinburneMapper
@@ -17,12 +19,19 @@ def shared_fields
1719
@shared_fields ||= (
1820
fname = File.basename(xml_file)
1921
{
22+
# the string id of the entire swinburne collection... represents all poems, all variants, all pages etc..
2023
:collection_id => collection_id,
24+
# the file path where this info came from
2125
:file_s => xml_file.sub("#{Rails.root}/", ''),
26+
# the file-name
2227
:filename_s => fname,
23-
:variant_facet => variant_id,
28+
# the variant (better name?) which currently comes from the file name
29+
:variant_s => variant_id,
30+
# used to tie similar results together -- a source file's contents should be grouped together using this
2431
:collapse_id => "#{collection_id}-#{variant_id}",
32+
# the friendly title of this collection
2533
:collection_title_t => xml.at('//sourceDesc/citnstruct/title').text,
34+
2635
:author_t => xml.at('//citnstruct/author').text,
2736
:publisher_t => xml.at('//citnstruct/imprint/publisher').text,
2837
:printer_t => xml.at('//citnstruct/imprint/printer').text,
@@ -42,9 +51,11 @@ def map &block
4251
xml.search('//text').each do |text|
4352
# create a title for the poem
4453
poem_title = text['n'].nil? ? 'n/a' : text['n']
54+
55+
poem_id = poem_title.to_slug
56+
4557
puts "\n** processing new poem... #{poem_title}\n"
4658
# individual pages broken up by tei pb tags....
47-
4859
NokogiriFragmenter.fragment(text, 'pb') do |page_fragment|
4960

5061
pb = page_fragment.at('pb')
@@ -57,19 +68,33 @@ def map &block
5768

5869
# the page number label
5970
page_num = pb ? page_fragment.at('pb')['n'].scan(/[0-9]+/).first : 'n/a'
60-
# the actual page break solr document
71+
72+
# the TEI page-break solr document id
6173
local_id = "#{variant_id}-#{doc_index}"
74+
6275
yield shared_fields.merge({
76+
# absolute id, unique to ever solr document
6377
:id => "#{collection_id}-#{local_id}",
78+
# a short, unique id, local to this collection's poem
6479
:local_id => local_id,
80+
# used for displaying/transforming the raw xml
6581
:xml_s => page_fragment.to_xml,
82+
83+
# raw xml within text field -- seems to work well forsource highlighting?
84+
:xml_source_t => page_fragment.to_xml,
85+
86+
# the xml *text only*, used for highlighing and searching
6687
:xml_t => page_fragment.text,
88+
# push the xml text into the main "text" field for easy searching
6789
:text => page_fragment.text,
68-
:poem_title_t => poem_title,
69-
:poem_title_facet => poem_title,
70-
:poem_slug_s => poem_title.to_slug,
71-
:title => "#{poem_title}, Page #{page_num}",
72-
:page_s => page_num,
90+
# the poem title, stored as a facet
91+
:poem_title_s => poem_title,
92+
# the poem title, transformed into a url friendly value
93+
:poem_title_id => poem_id,
94+
# this solr document title
95+
:title => "#{poem_title}, p. #{page_num}",
96+
# the page number of this poem fragment
97+
:page_number_s => page_num,
7398
})
7499
doc_index += 1
75100
puts "..."

0 commit comments

Comments
 (0)