Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preserve Unitdate Order #1499

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 13 additions & 25 deletions lib/arclight/normalized_date.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,24 @@ module Arclight
# e.g., "1990-2000, bulk 1990-1999"
# @see http://www2.archivists.org/standards/DACS/part_I/chapter_2/4_date
class NormalizedDate
# @param [String | Array<String>] `inclusive` from the `unitdate`
# @param [Array<String>] `bulk` from the `unitdate`
# @param [Array<String>] `other` from the `unitdate` when type is not specified
def initialize(inclusive, bulk = [], other = [])
@inclusive = (inclusive || []).map do |inclusive_text|
if inclusive_text.is_a? Array # of YYYY-YYYY for ranges
# NOTE: This code is not routable AFAICT in actual indexing.
# We pass arrays of strings (or xml nodes) here, and never a multidimensional array
year_range(inclusive_text)
elsif inclusive_text.present?
inclusive_text.strip
# @param [Array<String>] an array of unitdate strings in order
# @param [Array<String>] an array of corresponding type labels for dates or nil
def initialize(unitdates, unitdate_labels)
@date_accumulator = []
if unitdates.present?
unitdates.each_with_index do |unitdate, i|
if unitdate_labels[i].downcase.match?('bulk')
@date_accumulator << "#{unitdate_labels[i]} #{unitdate}"
else
@date_accumulator << unitdate
end
end
end&.join(', ')

@bulk = Array.wrap(bulk).compact.map(&:strip).join(', ')
@other = Array.wrap(other).compact.map(&:strip).join(', ')
end
end

# @return [String] the normalized title/date
def to_s
normalize
@date_accumulator.join(', ')
end

private
Expand All @@ -36,14 +33,5 @@ def to_s
def year_range(date_array)
YearRange.new(date_array.include?('/') ? date_array : date_array.map { |v| v.tr('-', '/') }).to_s
end

# @see http://www2.archivists.org/standards/DACS/part_I/chapter_2/4_date for rules
def normalize
result = []
result << inclusive if inclusive.present?
result << other if other.present?
result << "bulk #{bulk}" if bulk.present?
result.compact.map(&:strip).join(', ')
end
end
end
18 changes: 12 additions & 6 deletions lib/arclight/traject/ead2_component_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,21 @@
to_field 'title_ssm', extract_xpath('./did/unittitle')
to_field 'title_tesim', extract_xpath('./did/unittitle')

to_field 'unitdate_bulk_ssim', extract_xpath('./did/unitdate[@type="bulk"]')
to_field 'unitdate_inclusive_ssm', extract_xpath('./did/unitdate[@type="inclusive"]')
to_field 'unitdate_other_ssim', extract_xpath('./did/unitdate[not(@type)]')
to_field 'unitdates_ssm', extract_xpath('./did/unitdate')
to_field 'unitdates_labels_ssm' do |record, accumulator|
record.xpath('.//did/unitdate').each do |unitdate|
if unitdate.attribute('type')
accumulator << unitdate.attribute('type')&.value
else
accumulator << ""
end
end
end

to_field 'normalized_date_ssm' do |_record, accumulator, context|
accumulator << settings['date_normalizer'].constantize.new(
context.output_hash['unitdate_inclusive_ssm'],
context.output_hash['unitdate_bulk_ssim'],
context.output_hash['unitdate_other_ssim']
context.output_hash['unitdates_ssm'],
context.output_hash['unitdates_labels_ssm']
).to_s
end

Expand Down
19 changes: 12 additions & 7 deletions lib/arclight/traject/ead2_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,16 @@
to_field 'title_tesim', extract_xpath('/ead/archdesc/did/unittitle')
to_field 'ead_ssi', extract_xpath('/ead/eadheader/eadid')

to_field 'unitdate_ssm', extract_xpath('/ead/archdesc/did/unitdate')
to_field 'unitdate_bulk_ssim', extract_xpath('/ead/archdesc/did/unitdate[@type="bulk"]')
to_field 'unitdate_inclusive_ssm', extract_xpath('/ead/archdesc/did/unitdate[@type="inclusive"]')
to_field 'unitdate_other_ssim', extract_xpath('/ead/archdesc/did/unitdate[not(@type)]')
to_field 'unitdates_ssm', extract_xpath('/ead/archdesc/did/unitdate')
to_field 'unitdates_labels_ssm' do |record, accumulator|
record.xpath('/ead/archdesc/did/unitdate').each do |unitdate|
if unitdate.attribute('type')
accumulator << unitdate.attribute('type')&.value
else
accumulator << ""
end
end
end

# All top-level docs treated as 'collection' for routing / display purposes
to_field 'level_ssm' do |_record, accumulator|
Expand All @@ -104,9 +110,8 @@

to_field 'normalized_date_ssm' do |_record, accumulator, context|
accumulator << settings['date_normalizer'].constantize.new(
context.output_hash['unitdate_inclusive_ssm'],
context.output_hash['unitdate_bulk_ssim'],
context.output_hash['unitdate_other_ssim']
context.output_hash['unitdates_ssm'],
context.output_hash['unitdates_labels_ssm']
).to_s
end

Expand Down
3 changes: 3 additions & 0 deletions spec/fixtures/ead/nlm/alphaomegaalpha.xml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@
<physfacet>Compact digital disc</physfacet>
<extent altrender="materialtype spaceoccupied">3 CDs</extent>
</physdesc>
<unitdate normal="1888">1888</unitdate>
<unitdate normal="1894/1992" type="inclusive">1894-1992</unitdate>
<unitdate normal="1903/1962" type="bulk">1903-1962</unitdate>
<langmaterial id="aspace_1cf405d4520ad390ab7b5532eab3ea00">Collection materials primarily in
<language langcode="eng">English.</language></langmaterial>
<abstract id="aspace_3dcd45a7a2d2d0a1568d71906a03a4c1">
Expand Down Expand Up @@ -407,6 +409,7 @@
<unitid>MS C 271.I</unitid>
<unitdate normal="1902/1976" type="inclusive">1902-1976</unitdate>
<unitdate normal="1902/1976" type="bulk">1975-1976</unitdate>
<unitdate normal="1988">1988</unitdate>
</did>
<scopecontent id="aspace_00d4328c32ed5e54eac5c662aa45245a">
<p>Administrative records include details materials directly related to the history and
Expand Down
56 changes: 27 additions & 29 deletions spec/lib/arclight/normalized_date_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,22 @@
require 'spec_helper'

RSpec.describe Arclight::NormalizedDate do
subject(:normalized_date) { described_class.new(date_inclusive, date_bulk, date_other).to_s }
subject(:normalized_date) { described_class.new(unitdates, unitdate_labels).to_s }

let(:date_inclusive) { ['1990-2000'] }
let(:date_bulk) { '1999-2005' }
let(:date_other) { 'Undated' }
let(:unitdates) { ['1905', '1927-2000', '1982-1995'] }
let(:unitdate_labels) { ['', 'inclusive', 'bulk'] }

context 'under normal conditions' do
it 'joins dates' do
expect(normalized_date).to eq '1990-2000, Undated, bulk 1999-2005'
expect(normalized_date).to eq '1905, 1927-2000, bulk 1982-1995'
end

context 'multiple normalized dates' do
let(:date_inclusive) { %w[1990 1992] }
let(:unitdates) { %w[1990 1992] }
let(:unitdate_labels) { %w[inclusive inclusive] }

it 'are joined w/ a comma' do
expect(normalized_date).to eq '1990, 1992, Undated, bulk 1999-2005'
expect(normalized_date).to eq '1990, 1992'
end
end
end
Expand All @@ -27,61 +27,59 @@
# NOTE: This test is the only place where the code that exercises this is routable
# This has to be a multidimensional array, and the resulting XML nodes sent in are always flat
context 'multiples' do
let(:date_inclusive) { [%w[1990-2000 2001-2002 2004]] }
let(:date_bulk) { '1990-2004' }
let(:unitdates) { ['1990-2000', '2001-2002', '2004', '1990-2004'] }
let(:unitdate_labels) { ['inclusive', 'inclusive', 'INCLUSIVE', 'bulk'] }

it 'uses compressed joined years' do
expect(normalized_date).to eq '1990-2002, 2004, Undated, bulk 1990-2004'
expect(normalized_date).to eq '1990-2000, 2001-2002, 2004, bulk 1990-2004'
end
end

context 'undated' do
let(:date_bulk) { 'n.d.' }
let(:unitdates) { ['1905', '1927-2000', 'n.d.'] }

it 'do not normalized term "undated"' do
expect(normalized_date).to eq '1990-2000, Undated, bulk n.d.'
expect(normalized_date).to eq '1905, 1927-2000, bulk n.d.'
end
end

context 'circa' do
let(:date_bulk) { 'c.1995' }
context 'circa and mixed case' do
let(:unitdates) { ['1990-2000', 'c.1995'] }
let(:unitdate_labels) { ['', 'BuLk'] }

it 'do not normalized term "circa"' do
expect(normalized_date).to eq '1990-2000, Undated, bulk c.1995'
expect(normalized_date).to eq '1990-2000, BuLk c.1995'
end
end

context 'no bulk' do
let(:date_bulk) { nil }
let(:date_other) { nil }
let(:unitdate_labels) { ['', 'inclusive', ''] }

it 'uses inclusive date only' do
expect(normalized_date).to eq '1990-2000'
expect(normalized_date).to eq '1905, 1927-2000, 1982-1995'
end
end

context 'no inclusive or bulk but other' do
let(:date_inclusive) { nil }
let(:date_bulk) { nil }
let(:date_other) { 'n.d.' }
let(:unitdates) { %w[1963 1954] }
let(:unitdate_labels) { ['', ''] }

it 'uses other' do
expect(normalized_date).to eq 'n.d.'
expect(normalized_date).to eq '1963, 1954'
end
end

context 'no inclusive but bulk' do
let(:date_inclusive) { nil }

it 'uses other and bulk' do
expect(normalized_date).to eq 'Undated, bulk 1999-2005'
let(:unitdates) { %w[1963 1954-1990] }
let(:unitdate_labels) { ['bulk', ''] }
it 'does not know what to do' do
expect(normalized_date).to eq 'bulk 1963, 1954-1990'
end
end

context 'no information' do
let(:date_inclusive) { nil }
let(:date_bulk) { nil }
let(:date_other) { nil }
let(:unitdates) { nil }
let(:unitdate_labels) { nil }

it 'does not know what to do' do
expect(normalized_date).to eq ''
Expand Down
Loading