Skip to content

Commit

Permalink
Merge pull request #106 from tulibraries/qa
Browse files Browse the repository at this point in the history
Merge qa and master
  • Loading branch information
dkinzer committed Jun 12, 2020
2 parents 9029eed + 2963f3f commit e014882
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 60 deletions.
9 changes: 4 additions & 5 deletions lib/cob_index/macros/custom.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ module Macros
module Custom
ARCHIVE_IT_LINKS = "archive-it.org/collections/"
NOT_FULL_TEXT = /book review|publisher description|sample text|View cover art|Image|cover image|table of contents/i
GENRE_STOP_WORDS = /CD-ROM|CD-ROMs|Compact discs|Computer network resources|Databases|Electronic book|Electronic books|Electronic government information|Electronic journal|Electronic journals|Electronic newspapers|Electronic reference sources|Electronic resource|Full text|Internet resource|Internet resources|Internet videos|Online databases|Online resources|Periodical|Periodicals|Sound recordings|Streaming audio|Streaming video|Video recording|Videorecording|Web site|Web sites|Périodiques|Congrès|Ressource Internet|Périodqiue électronique/i
GENRE_STOP_WORDS = /CD-ROM|CD-ROMs|Compact discs|Computer network resources|Databases|Electronic book|Electronic books|Electronic government information|Electronic journal|Electronic journals|Electronic newspapers|Electronic reference sources|Electronic resource|Full text|Internet resource|Internet resources|Internet videos|Online databases|Online resources|Periodical|Periodicals|Sound recordings|Streaming audio|Streaming video|Video recording|Videorecording|Web site|Web sites|Périodiques|Congrès|Ressource Internet|Périodqiue électronique|Online-Publikation|Elektronische Publikation|Diccionarios|Libros electrónicos|Périodique électronique \(Descripteur de forme\)/i
SEPARATOR = " — "
A_TO_Z = ("a".."z").to_a.join("")

Expand Down Expand Up @@ -416,10 +416,9 @@ def normalize_issn

def normalize_lccn
Proc.new do |rec, acc|
orig = acc.dup
acc.map! { |x| StdNum::LCCN.normalize(x) }
acc << orig
acc.flatten!
acc.map! { |x|
formatted_x = x.gsub("#", " ")
StdNum::LCCN.normalize(formatted_x) }
acc.uniq!
end
end
Expand Down
104 changes: 50 additions & 54 deletions spec/cob_index/macros/custom_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,78 +13,74 @@

RSpec.describe "custom methods" do

describe "#four_digit_year(field):" do
describe "#four_digit_year(field)" do
context "when field is nil" do
it "returns nil" do
expect(four_digit_year nil).to eq(nil)
end
describe "#four_digit_year(field)" do
context "when field is nil" do
it "returns nil" do
expect(four_digit_year nil).to eq(nil)
end
end

context "when given an empty string" do
it "returns nil" do
expect(four_digit_year "").to eq(nil)
expect(four_digit_year "\n").to eq(nil)
expect(four_digit_year "\n\n").to eq(nil)
expect(four_digit_year " ").to eq(nil)
end
context "when given an empty string" do
it "returns nil" do
expect(four_digit_year("")).to eq(nil)
expect(four_digit_year("\n")).to eq(nil)
expect(four_digit_year("\n\n")).to eq(nil)
expect(four_digit_year(" ")).to eq(nil)
end
end

context "when contains Roman Numerals" do
it "returns nil" do
expect(four_digit_year "MCCXLV").to eq(nil)
end
context "when contains Roman Numerals" do
it "returns nil" do
expect(four_digit_year "MCCXLV").to eq(nil)
end
end

it "returns nil for [n.d.],''" do
expect(four_digit_year '[n.d.],""').to eq(nil)
end
it 'returns nil for [n.d.],""' do
expect(four_digit_year '[n.d.],""').to eq(nil)
end

it "extracts year from MCCXLV [1745],1745" do
expect(four_digit_year "MCCXLV [1745],1745").to eq("1745")
end
it "extracts year from MCCXLV [1745],1745" do
expect(four_digit_year "MCCXLV [1745],1745").to eq("1745")
end

it "extracts the first possible 4 digit numeral" do
expect(four_digit_year "1918-1966.,1918 ").to eq("1918")
end
it "extracts the first possible 4 digit numeral" do
expect(four_digit_year "1918-1966.,1918 ").to eq("1918")
end

it "extracts the first possible 4 digit numeral" do
expect(four_digit_year "'18-1966.,1918 ").to eq("1966")
expect(four_digit_year "c1993.,1993").to eq("1993")
expect(four_digit_year "©2012,2012").to eq("2012")
end
it "extracts the first possible 4 digit numeral" do
expect(four_digit_year "18-1966.,1918 ").to eq("1966")
expect(four_digit_year "c1993.,1993").to eq("1993")
expect(four_digit_year "©2012,2012").to eq("2012")
end
end

describe "#to_marc_normalized" do
describe "#flank(field)" do
let(:input) {}
subject { Traject::Macros::Custom.flank input }
context "nil" do
it "returns an empty string" do
expect(subject).to be_nil
end
describe "#flank(field)" do
let(:input) {}
subject { Traject::Macros::Custom.flank input }
context "nil" do
it "returns an empty string" do
expect(subject).to be_nil
end
end

context "empty string" do
let(:input) { "" }
it "returns an empty string" do
expect(subject).to eq("")
end
context "empty string" do
let(:input) { "" }
it "returns an empty string" do
expect(subject).to eq("")
end
end

context "non empty string" do
let(:input) { "foo" }
it "returns a flanked string" do
expect(subject).to eq("matchbeginswith foo matchendswith")
end
context "non empty string" do
let(:input) { "foo" }
it "returns a flanked string" do
expect(subject).to eq("matchbeginswith foo matchendswith")
end
end

context "a string that is flanked" do
let(:input) { "matchbeginswith foo matchendswith" }
it "does not reflank a string" do
expect(subject).to eq(input)
end
context "a string that is flanked" do
let(:input) { "matchbeginswith foo matchendswith" }
it "does not reflank a string" do
expect(subject).to eq(input)
end
end
end
Expand Down
81 changes: 81 additions & 0 deletions spec/cob_index/macros/normalize_lccn_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# frozen_string_literal: true

require "rspec"
require "cob_index/macros/marc_format_classifier"
require "cob_index/macros/custom"
require "traject/macros/marc21_semantics"

require "traject/indexer"
require "marc/record"

include Traject::Macros::MarcFormats
include Traject::Macros::Custom

RSpec.describe Traject::Macros::Custom do
let(:test_class) do
Class.new(Traject::Indexer)
end

let(:records) { Traject::MarcReader.new(file, subject.settings).to_a }

let(:file) { File.new("spec/fixtures/marc_files/#{path}") }

let(:record) { MARC::XMLReader.new(StringIO.new(record_text)).first }

subject { test_class.new }

describe "#normalize_lccn" do
before do
stub_const("ENV", ENV.to_hash.merge("SOLR_DISABLE_UPDATE_DATE_CHECK" => "false"))
subject.instance_eval do
to_field("lccn_display", Traject::Macros::Marc21.extract_marc("010ab", separator: nil), &normalize_lccn)
settings do
provide "marc_source.type", "xml"
end
end
end

context "The simple case" do
let(:record_text) { '
<record>
<datafield ind1=" " ind2=" " tag="010">
<subfield code="a">87014950</subfield>
</datafield>
</record>
' }

it "keeps the number as is" do
expect(subject.map_record(record)).to eq("lccn_display" => [ "87014950" ])
end
end

context "LCCN includes a # symbol" do
let(:record_text) { '
<record>
<datafield ind1=" " ind2=" " tag="010">
<subfield code="a">sn#00061556</subfield>
</datafield>
</record>
' }

it "removes the # symbol and empty spaces" do
expect(subject.map_record(record)).to eq("lccn_display" => [ "sn00061556" ])
end
end

context "LCCN includes a / symbol" do
let(:record_text) { '
<record>
<datafield ind1=" " ind2=" " tag="010">
<subfield code="a">25004346#//r822</subfield>
</datafield>
</record>
' }

it "removes the / and all text to the right of it" do
expect(subject.map_record(record)).to eq("lccn_display" => [ "25004346" ])
end
end

end
end
2 changes: 1 addition & 1 deletion spec/fixtures/marc_files/genre_facet_examples.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
<datafield ind1=' ' ind2='0' tag='650'>
<subfield code='a'>Rock musicians</subfield>
<subfield code='z'>England</subfield>
<subfield code='v'>Electronic book</subfield>
<subfield code='v'>Périodique électronique (Descripteur de forme)</subfield>
</datafield>
</record>
</collection>

0 comments on commit e014882

Please sign in to comment.