This repository was archived by the owner on Oct 12, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 135
/
Copy path_parse.js
131 lines (102 loc) · 3.73 KB
/
_parse.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
// node 7.x
// built with streams for larger files
const fse = require('fs-extra');
const path = require('path');
const lineReader = require('line-reader');
const babyparse = require('babyparse');
const Promise = require('bluebird');
const intent_column = 0;
const utterance_column = 1;
var entityNames = [];
var eachLine = Promise.promisify(lineReader.eachLine);
function listOfIntents(intents) {
return intents.reduce(function (a, d) {
if (a.indexOf(d.intentName) === -1) {
a.push(d.intentName);
}
return a;
}, []);
}
function listOfEntities(utterances) {
return utterances.reduce(function (a, d) {
d.entityLabels.forEach(function(entityLabel) {
if (a.indexOf(entityLabel.entityName) === -1) {
a.push(entityLabel.entityName);
}
}, this);
return a;
}, []);
}
var utterance = function (rowAsString) {
let json = {
"text": "",
"intentName": "",
"entityLabels": [],
};
if (!rowAsString) return json;
let dataRow = babyparse.parse(rowAsString);
// Get intent name and utterance text
json.intentName = dataRow.data[0][intent_column];
json.text = dataRow.data[0][utterance_column];
// For each column heading that may be an entity, search for the element in this column in the utterance.
entityNames.forEach(function (entityName) {
entityToFind = dataRow.data[0][entityName.column];
if (entityToFind != "") {
strInd = json.text.indexOf(entityToFind);
if (strInd > -1) {
let entityLabel = {
"entityName": entityName.name,
"startCharIndex": strInd,
"endCharIndex": strInd + entityToFind.length - 1
}
json.entityLabels.push(entityLabel);
}
}
}, this);
return json;
};
const convert = async (config) => {
try {
var i = 0;
// get inFile stream
inFileStream = await fse.createReadStream(config.inFile, 'utf-8')
// create out file
var myOutFile = await fse.createWriteStream(config.outFile, 'utf-8');
var utterances = [];
// read 1 line at a time
return eachLine(inFileStream, (line) => {
// skip first line with headers
if (i++ == 0) {
// csv to baby parser object
let dataRow = babyparse.parse(line);
// populate entityType list
var index = 0;
dataRow.data[0].forEach(function (element) {
if ((index != intent_column) && (index != utterance_column)) {
entityNames.push({ name: element, column: index });
}
index++;
}, this);
return;
}
// transform utterance from csv to json
utterances.push(utterance(line));
}).then(() => {
console.log("intents: " + JSON.stringify(listOfIntents(utterances)));
console.log("entities: " + JSON.stringify(listOfEntities(utterances)));
myOutFile.write(JSON.stringify({ "converted_date": new Date().toLocaleString(), "utterances": utterances }));
myOutFile.end();
console.log("parse done");
console.log("JSON file should contain utterances. Next step is to create an app with the intents and entities it found.");
var model =
{
intents: listOfIntents(utterances),
entities: listOfEntities(utterances)
}
return model;
});
} catch (err) {
throw err;
}
}
module.exports = convert;