Skip to content

Commit ad630fa

Browse files
committed
Extract data on double-fetch only
1 parent 2ec064e commit ad630fa

File tree

2 files changed

+13
-21
lines changed

2 files changed

+13
-21
lines changed

modules/web-discovery-project/sources/content-extractor.es

+7-3
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,13 @@ export class ContentExtractor {
9797
this.urlAnalyzer = new UrlAnalyzer(this.patterns);
9898
}
9999

100-
run(pageContent, url, addStrictQuery) {
100+
extractQuery(url) {
101+
const { found, query } = this.urlAnalyzer.parseLinks(url);
102+
if (!found) return;
103+
return query;
104+
}
105+
106+
run(pageContent, url) {
101107
function discard(reason = "") {
102108
logger.debug("No messages found for query:", query, "Reason:", reason);
103109
return {
@@ -109,8 +115,6 @@ export class ContentExtractor {
109115
const { found, type, query } = this.urlAnalyzer.parseLinks(url);
110116
if (!found) return discard("No content found.");
111117

112-
if (addStrictQuery) this.wdp.addStrictQueries(url, query);
113-
114118
const messages = this.extractMessages(pageContent, type, query, url);
115119
if (messages.length === 0) {
116120
return discard("No content found.");

modules/web-discovery-project/sources/web-discovery-project.es

+6-18
Original file line numberDiff line numberDiff line change
@@ -3375,23 +3375,15 @@ const WebDiscoveryProject = {
33753375
) {
33763376
logger.debug("[onLocationChange] isSearchEngineUrl", activeURL);
33773377
pacemaker.setTimeout(
3378-
function (url, originalURL) {
3378+
function (url) {
33793379
if (!WebDiscoveryProject) {
33803380
return;
33813381
}
3382-
getContentDocument(originalURL)
3383-
.then((doc) => {
3384-
WebDiscoveryProject.checkURL(doc, url, true);
3385-
})
3386-
.catch((e) => {
3387-
logger.info(
3388-
`Failed to get content for originalURL=${originalURL} (internalURL=${url}, details=${e})`,
3389-
);
3390-
});
3382+
const query = WebDiscoveryProject.contentExtractor.extractQuery(url);
3383+
if (query) WebDiscoveryProject.addStrictQueries(url, query)
33913384
},
33923385
WebDiscoveryProject.WAIT_TIME,
33933386
activeURL,
3394-
originalURL,
33953387
);
33963388
}
33973389

@@ -3516,10 +3508,7 @@ const WebDiscoveryProject = {
35163508
currURL,
35173509
)
35183510
) {
3519-
try {
3520-
WebDiscoveryProject.checkURL(cd, currURL, false);
3521-
} catch (e) {}
3522-
//Check active usage...
3511+
// Check active usage...
35233512
// WebDiscoveryProject.activeUsage += 1;
35243513
WebDiscoveryProject.incrActiveUsage();
35253514
}
@@ -4682,11 +4671,10 @@ const WebDiscoveryProject = {
46824671
else return null;
46834672
},
46844673

4685-
checkURL(pageContent, url, addStrictQuery) {
4674+
checkURL(pageContent, url) {
46864675
const { messages } = WebDiscoveryProject.contentExtractor.run(
46874676
pageContent,
46884677
url,
4689-
addStrictQuery,
46904678
);
46914679
for (const message of messages)
46924680
WebDiscoveryProject.telemetry({
@@ -5021,7 +5009,7 @@ const WebDiscoveryProject = {
50215009
e.qurl,
50225010
function (url, page_data, ourl, x) {
50235011
let cd = WebDiscoveryProject.docCache[url]["doc"];
5024-
WebDiscoveryProject.checkURL(cd, url, false);
5012+
WebDiscoveryProject.checkURL(cd, url);
50255013
},
50265014
function (a, b, c, d) {
50275015
logger.debug("Error aux>>>> " + d);

0 commit comments

Comments
 (0)