This repository was archived by the owner on Dec 15, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathanalyzer.ts
170 lines (136 loc) · 4.67 KB
/
analyzer.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import { Logger } from 'probot'; // eslint-disable-line no-unused-vars
import * as request from 'request-promise-native';
import InvalidEnvironmentError from './invalid-environment-error';
type AllScores = { [modelName: string]: number[] };
/**
* Analyzes text for toxicity and other attributes.
*
* Requires the `PERSPECTIVE_KEY` environment variable to be set with a Google API key that has
* access to the Perspective API.
*/
export default class Analyzer {
/** Perspective API URL */
private apiUrl: string;
/** Length of a chunk of content when it needs to be broken up because it is too long */
private chunkLength: number;
/** Probot logger */
private log: Logger;
/** Maximum length of content that can be processed by the analysis API */
private maxLength: number;
/** Amount of content to slice off that is less than the length of a chunk so they overlap */
private sliceLength: number;
constructor(logger: Logger, maxLength = 3000) {
if (!process.env.PERSPECTIVE_KEY) {
throw new InvalidEnvironmentError('PERSPECTIVE_KEY');
}
const key = process.env.PERSPECTIVE_KEY;
this.apiUrl = `https://commentanalyzer.googleapis.com/v1alpha1/comments:analyze?key=${key}`;
this.log = logger;
this.maxLength = maxLength;
// Create some overlap in the chunks so that toxicity can't hide on borders between chunks
const overlapLength = Math.floor(this.maxLength / 6);
this.chunkLength = this.maxLength - overlapLength;
this.sliceLength = this.maxLength - overlapLength * 2;
}
/**
* Analyzes the event in `info` and returns a collection of scores for various sentiment models.
*
* If the text in `info.content` is [too long](https://github.com/atom/biohazard-alert/issues/1):
*
* 1. The text is split into chunks
* 2. Each chunk is analyzed separately
* 3. The highest score for a chunk is returned as the score for the event
*/
async analyze(info: EventInfo): Promise<Scores> {
this.log.debug(info, 'Analyze event');
const responses = await this.getAnalysis(info);
const allScores = this.extractScores(responses);
return this.maxScores(allScores);
}
/**
* Gets the raw analysis from the Perspective API.
*/
async getAnalysis(info: EventInfo): Promise<Perspective.Response[]> {
const chunks = this.split(info.content);
return Promise.all(
chunks.map(async chunk => {
return this.getChunkAnalysis(info, chunk);
})
);
}
/**
* Analyzes a chunk of the content and returns a toxicity value in the range `[0,1]`.
*/
private async analyzeChunk(info: EventInfo, chunk: string): Promise<number> {
const response = await this.getChunkAnalysis(info, chunk);
return response.attributeScores.TOXICITY.summaryScore.value;
}
private extractScores(responses: Perspective.Response[]): AllScores {
let scores: AllScores = {};
responses.forEach(response => {
const attrScores = response.attributeScores;
for (let attr in attrScores) {
if (scores[attr]) {
scores[attr].push(attrScores[attr].summaryScore.value);
} else {
scores[attr] = [attrScores[attr].summaryScore.value];
}
}
});
return scores;
}
/**
* Gets the raw analysis of a chunk of content.
*/
private async getChunkAnalysis(
info: EventInfo,
chunk: string
): Promise<Perspective.Response> {
const apiRequest = {
url: this.apiUrl,
body: {
comment: {
text: chunk
},
doNotStore: info.isRepoPrivate,
requestedAttributes: {
TOXICITY: {},
SEVERE_TOXICITY: {},
IDENTITY_ATTACK: {},
INSULT: {},
PROFANITY: {},
THREAT: {},
SEXUALLY_EXPLICIT: {},
FLIRTATION: {}
}
},
json: true
};
this.log.debug(request, `Call Perspective API on ${info.source}`);
const response = await ((request.post(
apiRequest
) as unknown) as Perspective.Response);
this.log.debug(response, `Perspective API response for ${info.source}`);
return response;
}
private maxScores(allScores: AllScores): Scores {
let scores: Scores = {};
for (let attr in allScores) {
scores[attr] = Math.max(...allScores[attr]);
}
return scores;
}
/**
* Splits `content` into an array of strings each short enough to be processed by the API.
*/
private split(content: string): string[] {
let chunks: string[] = [];
while (content.length > this.maxLength) {
let chunk = content.slice(0, this.chunkLength);
chunks.push(chunk);
content = content.slice(this.sliceLength, -1);
}
chunks.push(content);
return chunks;
}
}