forked from RichardLitt/vesper-to-ebird
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreateChecklists.js
451 lines (410 loc) · 17.1 KB
/
createChecklists.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
#!/usr/bin/env node
const meow = require('meow')
const fs = require('fs').promises
const Papa = require('papaparse')
const codesFile = require('./codes.json')
const cli = meow(`
Usage
$ node createChecklists.js input [opts]
Arguments
input The input file
Options
--config Optional path containing configuration
--start The starting time
--ends An end time
--date Specify a single date
--station Specify the station manually
--export Export results to a file
Examples
$ node createChecklists.js input.csv
$ node createChecklists.js input.csv,input2.csv
$ node createChecklists.js input.csv --start="2020/09/04 21:30:00" --end="2020/09/07 23:00:00" --export="2020-09-07 recorded"
$ node createChecklists.js input.csv --date="2020/09/08"
$ node createChecklists.js input.csv --station="NBNC"
$ node createChecklists.js --config ~/mytotallysecret/settings.json input.csv --date="2020/09/08"
`, {
flags: {
start: {
type: 'string'
},
end: {
type: 'string'
},
export: {
type: 'string'
},
date: {
type: 'string'
},
station: {
type: 'string',
default: 'msgr'
}
}
})
/**
* settings evaluates the location of settings in this order: ENV, cli, default
*
* @param {*} path file URL for settings
*/
function settings (path) {
if (process.env.VESPER_TO_EBIRD_SETTINGS !== '') {
// user has provided their own settings file via environment variable
return process.env.VESPER_TO_EBIRD_SETTINGS
} else if (cli.flags.config) {
// user has provided their own settings file via cli option
return cli.flags.config
} else {
// return the default settings file
return './settings.json'
}
}
const comments = require(settings()).species
const stations = require(settings()).stations
const slashCodes = require(settings()).slashCodes
const _ = require('lodash')
const moment = require('moment')
const chalk = require('chalk')
async function getData (input) {
const files = input.split(',')
let data = []
for (let file in files) {
file = await fs.readFile(files[file], 'utf8')
file = Papa.parse(file, { header: true })
// Remove newline at end of file
data = data.concat(file.data.filter(x => x.season !== ''))
}
return data
}
function getDates (input, opts) {
const dates = {}
const unique = _.uniq(_.map(input, (x) => {
if (opts && opts.start && opts.end) {
if (moment(x.real_detection_time, 'MM/DD/YY HH:mm:ss').isBetween(opts.start, opts.end)) {
return x.date
}
// Drop any dates which don't match
} else {
return x.date
}
})).filter(x => x)
unique.forEach(x => {
dates[x] = {}
})
return dates
}
function getStart (recordingStart, opts) {
if (opts && opts.start && recordingStart.isBefore(opts.start)) {
return opts.start
}
return recordingStart
}
function makeHourBuckets (input, dates, opts) {
const newDates = {}
for (var k in dates) newDates[k] = {}
_.forEach(Object.keys(newDates), date => {
// It might be more contained for testing to include a dateObj in its own
// object, instead of having to send input into this function
// Get any sessions per day. This is normally only , if started and stopped once per night.
// TODO Add tests for this
const sessions = _.uniq(_.map(_.filter(input, e => e.date === date), entry => {
return entry.recording_start
}))
sessions.forEach(session => {
// This will break if there are multiple different recording_starts per date
const dateObj = _.find(_.filter(input, e => e.date === date), ['recording_start', session])
if (dateObj) {
const recordingStart = moment(`${dateObj.date} ${dateObj.recording_start}`, 'MM/DD/YY HH:mm:ss')
const start = getStart(recordingStart, opts)
// Figure out how many buckets to make
const duration = moment.duration(dateObj.recording_length)
let end = moment(recordingStart).add(duration)
if (opts && opts.end &&
// If it is either today, or if it is tomorrow but before noon
(opts.end.isSame(start, 'day') || (opts.end.isSame(moment(start).add(1, 'day'), 'day') && opts.end.isBefore(moment(start).add(1, 'day').hour(12))))) {
// This resets for each date, so make sure it doesn't end up making buckets all the way through to the end
end = opts.end
}
if (start.isBefore(end)) {
if (!newDates[start.format('MM/DD/YY')]) {
newDates[start.format('MM/DD/YY')] = {}
}
// Add the initial time
newDates[start.format('MM/DD/YY')][start.format('HH:mm:ss')] = []
// Make an array of the times for that night
let hourString
let dateForHour = start.format('MM/DD/YY')
// TODO I can't seem to start at 23:00. This whole thing needs help.
for (let i = moment(start).add(1, 'hour').startOf('hour'); moment(i).isBefore(moment(end)); i.add(1, 'hours')) {
if (moment(i).isAfter(moment(start), 'day')) {
dateForHour = moment(date, 'MM/DD/YY').add(1, 'day').format('MM/DD/YY')
if (!newDates[dateForHour]) {
newDates[dateForHour] = {
'00:00:00': []
}
}
}
hourString = `${i.hours().toString().padStart(2, '0')}:00:00`
newDates[dateForHour][hourString] = []
}
}
}
})
if (_.isEmpty(newDates[date])) {
delete newDates[date]
}
})
return newDates
}
function getDuration (buckets, date, hour, arr, key, opts) {
function getRecordingEnd (entry) {
return moment(`${entry.date} ${entry.recording_start}`, 'MM/DD/YY hh:mm:ss').add(moment.duration({
hours: entry.recording_length.split(':')[0],
minutes: entry.recording_length.split(':')[1],
seconds: entry.recording_length.split(':')[2]
}))
}
function getRecordingStart (entry) {
return moment(`${entry.date} ${entry.recording_start}`, 'MM/DD/YY hh:mm:ss')
}
if (buckets[date][hour] && buckets[date][hour].length === 0) {
return null
}
let end = (opts && opts.end) ? opts.end : getRecordingEnd(buckets[date][hour][0])
let start = (opts && opts.start) ? opts.start : getRecordingStart(buckets[date][hour][0])
if (opts && opts.start) {
if (buckets[date][hour][0] && opts.start.isBefore(getRecordingStart(buckets[date][hour][0]))) {
start = getRecordingStart(buckets[date][hour][0])
}
}
if (opts && opts.end) {
if (buckets[date][hour][0] && opts.end.isAfter(getRecordingEnd(buckets[date][hour][0]))) {
end = getRecordingEnd(buckets[date][hour][0])
}
}
// If the checklist ends within an hour
if (moment(`${date} ${hour}`, 'MM/DD/YY HH:mm:ss').isSame(end, 'hour')) {
// Subtract the start time if it is in the same hour
if (moment(`${date} ${hour}`, 'MM/DD/YY HH:mm:ss').isSame(start, 'hour')) {
return end.minutes() - start.minutes()
// Or just use the amount of minutes in the hour
} else {
return end.minutes()
}
} else if (moment(`${date} ${hour}`, 'MM/DD/YY HH:mm:ss').isSame(start, 'hour')) {
return 60 - start.minutes()
}
return 60
}
function printResults (input, buckets, opts) {
let counts
const totalCounts = {}
Object.keys(buckets).sort().forEach(date => {
if (Object.keys(buckets[date]).filter(x => buckets[date][x].length !== 0).length) {
console.log('')
console.log(chalk.blue(`Date: ${date}`))
Object.keys(buckets[date]).sort().forEach((hour, key, arr) => {
if (buckets[date][hour].length !== 0) {
console.log(`Hour: ${chalk.green(hour.split(':').slice(0, 2).join(':'))}`)
const duration = getDuration(buckets, date, hour, arr, key, opts)
if (duration) {
console.log(`Duration: ${chalk.white(duration)} mins.`)
}
console.log('Species\tBirds\tNFCs')
counts = _.countBy(buckets[date][hour], 'species')
Object.keys(counts).sort((a, b) => a.length - b.length).forEach(species => {
const birdEstimate = estimateBirdsCalling(buckets[date][hour], species)
if (!totalCounts[date]) {
totalCounts[date] = {}
}
if (!totalCounts[date][species]) {
totalCounts[date][species] = {
NFCs: counts[species],
birds: birdEstimate
}
} else {
totalCounts[date][species].NFCs += counts[species]
totalCounts[date][species].birds += birdEstimate
}
// This shows how many thrushes or tseeps were called
// console.log(_.countBy(buckets[date][hour], value => value.detector))
// Flag errors often causes by pressing 'N' meaning 'Next'
if (species === 'nowa') {
console.log(chalk.red(`NOWA:\t ${counts[species]}`))
} else if (species.includes('sp.')) {
console.log(`${species.charAt(0).toUpperCase() + species.slice(1)}:\t${birdEstimate}\t(${counts[species]})`)
} else {
console.log(`${species.toUpperCase()}:\t${birdEstimate}\t(${counts[species]})`)
}
})
console.log('')
}
})
}
})
Object.keys(totalCounts).forEach(date => {
console.log(chalk.blue(date + ' totals:'))
Object.keys(totalCounts[date]).sort((a, b) => a.length - b.length).forEach(species => {
let name
if (species.includes('sp.')) {
name = species.charAt(0).toUpperCase() + species.slice(1)
} else {
name = species.toUpperCase()
}
console.log(`${name}: ${totalCounts[date][species].birds} probable ${(totalCounts[date][species].birds === 1) ? 'bird' : 'birds'}, with ${totalCounts[date][species].NFCs} total calls.`)
})
console.log('')
})
}
function estimateBirdsCalling (array, species) {
const format = 'HH:mm:ss'
const calls = _.map(_.filter(array, x => x.species === species), 'detection_time')
let dupes = 0
calls.forEach((time, index, array) => {
if (-moment(time, format).diff(moment(array[index + 1], format), 'seconds') <= 15) {
dupes++
}
})
return calls.length - dupes
}
async function exportResults (input, buckets, opts) {
const codes = Object.assign(slashCodes)
_.forEach(codesFile.data, x => {
codes[x.Code] = x.Species
})
const output = []
const eBirdReportObj = {
'Common Name': '', // waterfowl sp.
Genus: '',
Species: '',
Number: '', // 38
'Species Comments': '', // 1 NFC.
'Location Name': stations[opts.station]['Location Name'],
Latitude: stations[opts.station].Latitude,
Longitude: stations[opts.station].Longitude,
Date: '', // 9/7/2020
'Start Time': '', // 3:00 AM
'State/Province': stations[opts.station].State,
'Country Code': 'US',
Protocol: 'P54', // Code for NFCP.
'Number of Observers': '1',
Duration: '', // 60
'All observations reported?': 'N',
'Effort Distance Miles': '',
'Effort area acres': '',
'Submission Comments': `${stations[opts.station].Kit} Calls detected using Vesper (https://github.com/HaroldMills/Vesper) unless noted. This checklist was created automatically using https://github.com/RichardLitt/vesper-to-ebird.`
}
let counts
Object.keys(buckets).forEach(date => {
Object.keys(buckets[date]).forEach((hour, key, arr) => {
if (hour.length !== 0) {
counts = _.countBy(buckets[date][hour], 'species')
Object.keys(counts).forEach(species => {
const birdEstimate = estimateBirdsCalling(buckets[date][hour], species)
const object = {}
Object.assign(object, eBirdReportObj)
object.Number = birdEstimate
object.Date = moment(date, 'MM/DD/YY').format('M/DD/YYYY')
object['Start Time'] = hour.split(':').slice(0, 2).join(':')
object.Duration = getDuration(buckets, date, hour, arr, key, opts)
let speciesComment = `${counts[species]} NFC.<br><br> Detected automatically using Vesper, available at https://github.com/HaroldMills/Vesper. Classified manually using Vesper by me. More justification for this identification available upon request; here, without researching extensively, I was able to identify the call as being very typical of this species, based on known recordings I've seen.`
// If there is a comment from the comments page, use that
if (comments[species.toUpperCase()] && !comments[species.toUpperCase()].WIP) {
speciesComment = `${counts[species]} NFC.<br><br> ${comments[species.toUpperCase()].text} All NFC calls identified here follow this pattern, unless noted. If the number of identified calls does not match the NFC count, it is because the calls occurred close enough to each other to make it unclear whether or not a single bird was calling.<br><br> For more on ${species.toUpperCase()} NFC identification, consult this checklist ${comments[species.toUpperCase()].example}, or the updated page at https://birdinginvermont.com/nfc-species/${species}.`
}
object['Species Comments'] = speciesComment.replace(/\n/g, '<br>')
if (species.includes('sp.')) {
object['Common Name'] = taxonomicMatching.commonName(species)
object['Species Comments'] = `${counts[species]} NFC.<br><br> Detected automatically in the sound file using Vesper, available at https://github.com/HaroldMills/Vesper. Classified manually by me. All tseeps and most thrush calls are given by passerine species, to the best of my knowledge; any extraneous noises were not included in this count. Any call that was within fifteen seconds of another call of the previous call was not counted in the species total in order to ensure under- and not overcounts. The actual number may vary significantly. Vesper may also fail to identify many calls, so accuracy should not be assumed in this call count. The NFC number in this comment is the total amount of calls identifed by Vesper.`
} else if (species === 'nowa') {
console.log(chalk.red(`You saw ${counts[species]} NOWA species - is that right? Or did you click N by accident?`))
} else {
object['Common Name'] = codes[species.toUpperCase()]
}
output.push(object)
})
}
})
})
fs.writeFile(`${cli.flags.export.replace(/\.csv/, '')}.csv`, Papa.unparse(output, { header: false }), 'utf8')
}
// Shim what Vesper can identify to the nearest eBird taxonomic designation
const taxonomicMatching = {
species: function () {
return Object.keys(this.matches)
},
matches: {
'': 'passerine sp.', // Both tseep and thrush classifiers default to passerine. Some issues - swallows? Cuckoos?
unkn: 'bird sp.', // This will default to passerine sp., based on tseep and thrush sp mostly naming these species.
zeep: 'warbler sp.', // All zeeps are warblers.
sparrow: 'sparrow sp.',
peep: 'peep sp.'
},
commonName: function (designation) {
if (this.species().includes(designation)) {
return this.matches[designation]
}
return designation
}
}
function putEntryInBucket (entry, date, buckets, opts) {
entry.species = taxonomicMatching.commonName(entry.species)
// Set the hour to match the bucket name
let hour = `${date.hour().toString().padStart(2, '0')}:00:00`
const recordingStart = getStart(moment(entry.date + ' ' + entry.recording_start, 'MM/DD/YY HH:mm:ss'), opts)
if (date.isSame(recordingStart, 'hour')) {
hour = recordingStart.format('HH:mm:ss')
}
if (opts && opts.start && opts.start.isSame(date, 'hour') && !opts.start.isBefore(date)) {
hour = opts.start.format('HH:mm:ss')
}
buckets[date.format('MM/DD/YY')][hour].push(entry)
}
async function run () {
const input = await getData(cli.input[0])
const opts = {}
if ((!cli.flags.start && cli.flags.end) || (cli.flags.start && !cli.flags.end)) {
console.log('You need both a start and an end date')
process.exit(1)
}
if (cli.flags.date) {
opts.start = moment(cli.flags.date, 'YYYY/MM/DD').hour(12)
opts.end = moment(cli.flags.date, 'YYYY/MM/DD').hour(12).add(1, 'day')
} else if (cli.flags.start && cli.flags.end) {
opts.start = moment(cli.flags.start, 'YYYY/MM/DD HH:mm:ss')
opts.end = moment(cli.flags.end, 'YYYY/MM/DD HH:mm:ss')
if (opts.end.isBefore(opts.start)) {
console.log('The end cannot precede the beginning.')
process.exit(1)
}
}
opts.station = (cli.flags.station) ? cli.flags.station : 'msgr'
const dates = getDates(input, opts)
// Put all of the sightings into collections of hours
// This is because eBird requests all checklists be under an hour
const buckets = makeHourBuckets(input, dates, opts)
let date
input.forEach(entry => {
date = moment(entry.real_detection_time, 'MM/DD/YY HH:mm:ss')
if (opts && opts.start && opts.end) {
if (date.isBetween(opts.start, opts.end)) {
putEntryInBucket(entry, date, buckets, opts)
}
} else {
putEntryInBucket(entry, date, buckets, opts)
}
})
printResults(input, buckets, opts)
if (cli.flags.export === '') {
console.log('Please provide an export file name')
process.exit(1)
}
if (cli.flags.export) {
exportResults(input, buckets, opts)
}
}
run()
module.exports = {
makeHourBuckets,
getStart
}