forked from JabRef/jabref
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathJournalAbbreviationRepository.java
177 lines (154 loc) · 7.79 KB
/
JournalAbbreviationRepository.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
package org.jabref.logic.journals;
import java.nio.file.Path;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.h2.mvstore.MVMap;
import org.h2.mvstore.MVStore;
/**
* A repository for all journal abbreviations, including add and find methods.
*/
public class JournalAbbreviationRepository {
static final Pattern QUESTION_MARK = Pattern.compile("\\?");
private final Map<String, Abbreviation> fullToAbbreviationObject = new HashMap<>();
private final Map<String, Abbreviation> abbreviationToAbbreviationObject = new HashMap<>();
private final Map<String, Abbreviation> dotlessToAbbreviationObject = new HashMap<>();
private final Map<String, Abbreviation> shortestUniqueToAbbreviationObject = new HashMap<>();
private final TreeSet<Abbreviation> customAbbreviations = new TreeSet<>();
/**
* Initializes the internal data based on the abbreviations found in the given MV file
*/
public JournalAbbreviationRepository(Path journalList) {
MVMap<String, Abbreviation> mvFullToAbbreviationObject;
try (MVStore store = new MVStore.Builder().readOnly().fileName(journalList.toAbsolutePath().toString()).open()) {
mvFullToAbbreviationObject = store.openMap("FullToAbbreviation");
mvFullToAbbreviationObject.forEach((name, abbreviation) -> {
String abbrevationString = abbreviation.getAbbreviation();
String shortestUniqueAbbreviation = abbreviation.getShortestUniqueAbbreviation();
Abbreviation newAbbreviation = new Abbreviation(
name,
abbrevationString,
shortestUniqueAbbreviation
);
fullToAbbreviationObject.put(name, newAbbreviation);
abbreviationToAbbreviationObject.put(abbrevationString, newAbbreviation);
dotlessToAbbreviationObject.put(newAbbreviation.getDotlessAbbreviation(), newAbbreviation);
shortestUniqueToAbbreviationObject.put(shortestUniqueAbbreviation, newAbbreviation);
});
}
}
/**
* Initializes the repository with demonstration data. Used if no abbreviation file is found.
*/
public JournalAbbreviationRepository() {
Abbreviation newAbbreviation = new Abbreviation(
"Demonstration",
"Demo",
"Dem"
);
fullToAbbreviationObject.put("Demonstration", newAbbreviation);
abbreviationToAbbreviationObject.put("Demo", newAbbreviation);
dotlessToAbbreviationObject.put("Demo", newAbbreviation);
shortestUniqueToAbbreviationObject.put("Dem", newAbbreviation);
}
private static boolean isMatched(String name, Abbreviation abbreviation) {
return name.equalsIgnoreCase(abbreviation.getName())
|| name.equalsIgnoreCase(abbreviation.getAbbreviation())
|| name.equalsIgnoreCase(abbreviation.getDotlessAbbreviation())
|| name.equalsIgnoreCase(abbreviation.getShortestUniqueAbbreviation());
}
private static boolean isMatchedAbbreviated(String name, Abbreviation abbreviation) {
boolean isExpanded = name.equalsIgnoreCase(abbreviation.getName());
if (isExpanded) {
return false;
}
return name.equalsIgnoreCase(abbreviation.getAbbreviation())
|| name.equalsIgnoreCase(abbreviation.getDotlessAbbreviation())
|| name.equalsIgnoreCase(abbreviation.getShortestUniqueAbbreviation());
}
/**
* Returns true if the given journal name is contained in the list either in its full form
* (e.g., Physical Review Letters) or its abbreviated form (e.g., Phys. Rev. Lett.).
*/
public boolean isKnownName(String journalName) {
if (QUESTION_MARK.matcher(journalName).find()) {
return false;
}
String journal = journalName.trim().replaceAll(Matcher.quoteReplacement("\\&"), "&");
return customAbbreviations.stream().anyMatch(abbreviation -> isMatched(journal, abbreviation))
|| fullToAbbreviationObject.containsKey(journal)
|| abbreviationToAbbreviationObject.containsKey(journal)
|| dotlessToAbbreviationObject.containsKey(journal)
|| shortestUniqueToAbbreviationObject.containsKey(journal);
}
/**
* Returns true if the given journal name is in its abbreviated form (e.g. Phys. Rev. Lett.). The test is strict,
* i.e., journals whose abbreviation is the same as the full name are not considered
*/
public boolean isAbbreviatedName(String journalName) {
if (QUESTION_MARK.matcher(journalName).find()) {
return false;
}
String journal = journalName.trim().replaceAll(Matcher.quoteReplacement("\\&"), "&");
return customAbbreviations.stream().anyMatch(abbreviation -> isMatchedAbbreviated(journal, abbreviation))
|| abbreviationToAbbreviationObject.containsKey(journal)
|| dotlessToAbbreviationObject.containsKey(journal)
|| shortestUniqueToAbbreviationObject.containsKey(journal);
}
/**
* Attempts to get the abbreviation of the journal given.
*
* @param input The journal name (either full name or abbreviated name).
*/
public Optional<Abbreviation> get(String input) {
// Clean up input: trim and unescape ampersand
String journal = input.trim().replaceAll(Matcher.quoteReplacement("\\&"), "&");
Optional<Abbreviation> customAbbreviation = customAbbreviations.stream()
.filter(abbreviation -> isMatched(journal, abbreviation))
.findFirst();
if (customAbbreviation.isPresent()) {
return customAbbreviation;
}
return Optional.ofNullable(fullToAbbreviationObject.get(journal))
.or(() -> Optional.ofNullable(abbreviationToAbbreviationObject.get(journal)))
.or(() -> Optional.ofNullable(dotlessToAbbreviationObject.get(journal)))
.or(() -> Optional.ofNullable(shortestUniqueToAbbreviationObject.get(journal)));
}
public void addCustomAbbreviation(Abbreviation abbreviation) {
Objects.requireNonNull(abbreviation);
// We do NOT want to keep duplicates
// The set automatically "removes" duplicates
// What is a duplicate? An abbreviation is NOT the same if any field is NOT equal (e.g., if the shortest unique differs, the abbreviation is NOT the same)
customAbbreviations.add(abbreviation);
}
public Collection<Abbreviation> getCustomAbbreviations() {
return customAbbreviations;
}
public void addCustomAbbreviations(Collection<Abbreviation> abbreviationsToAdd) {
abbreviationsToAdd.forEach(this::addCustomAbbreviation);
}
public Optional<String> getNextAbbreviation(String text) {
return get(text).map(abbreviation -> abbreviation.getNext(text.trim()));
}
public Optional<String> getDefaultAbbreviation(String text) {
return get(text).map(Abbreviation::getAbbreviation);
}
public Optional<String> getDotless(String text) {
return get(text).map(Abbreviation::getDotlessAbbreviation);
}
public Optional<String> getShortestUniqueAbbreviation(String text) {
return get(text).map(Abbreviation::getShortestUniqueAbbreviation);
}
public Set<String> getFullNames() {
return fullToAbbreviationObject.keySet();
}
public Collection<Abbreviation> getAllLoaded() {
return fullToAbbreviationObject.values();
}
}