Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DOP-5018: Add functionality to delete stale properties and documents from Search database #6

Merged
merged 18 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions search-manifest/src/generateManifest/document.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { JSONPath } from "jsonpath-plus";
import { Facet } from "./createFacets";
import { ManifestEntry } from "./manifestEntry";
import { BSON } from "bson";

export class Document {
//Return indexing data from a page's JSON-formatted AST for search purposes
Expand All @@ -18,7 +19,7 @@ export class Document {
noIndex: any;
reasons: any;

constructor(doc: any) {
constructor(doc: BSON.Document) {
this.tree = doc;

//find metadata
Expand Down Expand Up @@ -63,8 +64,8 @@ export class Document {
if ("robots" in val && (val.robots == "None" || val.robots == "noindex"))
robots = false;

keywords = val?.keywords ?? null;
description = val?.description ?? null;
keywords = val?.keywords;
description = val?.description;
}

return [robots, keywords, description];
Expand Down Expand Up @@ -203,7 +204,7 @@ export class Document {

if (this.noIndex) {
console.info("Refusing to index");
return null;
return;
}

const document = new ManifestEntry({
Expand Down
6 changes: 2 additions & 4 deletions search-manifest/src/generateManifest/manifest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,9 @@ export class Manifest {
this.global = includeInGlobalSearch;
}

addDocument(document: ManifestEntry | null) {
addDocument(document: ManifestEntry) {
//Add a document to the manifest
if (document) {
this.documents.push(document);
}
this.documents.push(document);
}

export() {
Expand Down
6 changes: 5 additions & 1 deletion search-manifest/src/generateManifest/manifestEntry.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { Facet } from "./createFacets";

//change this to an interface
export class ManifestEntry {
slug: string;
Expand All @@ -8,8 +10,10 @@ export class ManifestEntry {
code: { lang: string | null; value: string }[];
preview?: string | null;
tags: string | null;
facets: any;
//TODO: add type
facets: Facet;

// TODO: add type for entry
constructor(entry: any) {
this.slug = entry.slug;
this.title = entry.title;
Expand Down
38 changes: 23 additions & 15 deletions search-manifest/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { uploadManifest } from "./uploadToAtlas/uploadManifest";

import { readdir, readFileSync } from "fs";
import getProperties from "./uploadToAtlas/getProperties";
import { teardown } from "./uploadToAtlas/searchConnector";

const readdirAsync = promisify(readdir);

Expand Down Expand Up @@ -37,8 +38,8 @@ export const generateManifest = async () => {
//put file into Document object
//export Document object
const processedDoc = new Document(decoded).exportAsManifestDocument();
//add document to manifest object
manifest.addDocument(processedDoc);
//add document to manifest object if it was able to be indexed
if (processedDoc) manifest.addDocument(processedDoc);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can put the teardown function call at the end of the integration callback function somewhere here. Basically, the end of the addBuildEventHandler callback, you can add the teardown there

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Discussed async, postponing to solve in later ticket

}
return manifest;
};
Expand All @@ -56,26 +57,33 @@ integration.addBuildEventHandler(
const manifest = await generateManifest();

console.log("=========== finished generating manifests ================");
const {
searchProperty,
url,
includeInGlobalSearch,
}: { searchProperty: string; url: string; includeInGlobalSearch: boolean } =
await getProperties(branch);
//TODO: create an interface for this return type

manifest.url = url;
manifest.global = includeInGlobalSearch;
try {
const {
searchProperty,
url,
includeInGlobalSearch,
}: {
searchProperty: string;
url: string;
includeInGlobalSearch: boolean;
} = await getProperties(branch);

//TODO: upload manifests to S3
manifest.url = url;
manifest.global = includeInGlobalSearch;

//uploads manifests to atlas
console.log("=========== Uploading Manifests =================");
try {
//TODO: upload manifests to S3

//uploads manifests to atlas
console.log("=========== Uploading Manifests =================");
await uploadManifest(manifest, searchProperty);
console.log("=========== Manifests uploaded to Atlas =================");
} catch (e) {
console.log("Manifest could not be uploaded", e);
} finally {
teardown();
}
console.log("=========== Manifests uploaded to Atlas =================");
}
);

Expand Down
41 changes: 0 additions & 41 deletions search-manifest/src/uploadToAtlas/deleteStale.ts

This file was deleted.

16 changes: 16 additions & 0 deletions search-manifest/src/uploadToAtlas/deleteStaleProperties.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import { db, teardown } from "./searchConnector";
import { DatabaseDocument } from "./types";

const ATLAS_SEARCH_URI = `mongodb+srv://${process.env.MONGO_ATLAS_USERNAME}:${process.env.MONGO_ATLAS_PASSWORD}@${process.env.MONGO_ATLAS_SEARCH_HOST}/?retryWrites=true&w=majority`;

//TODO: change these teamwide env vars in Netlify UI when ready to move to prod
const SEARCH_DB_NAME = `${process.env.MONGO_ATLAS_SEARCH_DB_NAME}`;

export const deleteStaleProperties = async (searchProperty: string) => {
const dbSession = await db({ uri: ATLAS_SEARCH_URI, dbName: SEARCH_DB_NAME });
const documentsColl = dbSession.collection<DatabaseDocument>("documents");
console.debug(`Removing old documents`);
const query = { searchProperty: { $regex: searchProperty } };
const status = await documentsColl?.deleteMany(query);
return status;
};
70 changes: 34 additions & 36 deletions search-manifest/src/uploadToAtlas/getProperties.ts
Original file line number Diff line number Diff line change
@@ -1,24 +1,22 @@
import { Collection, Db, Document, WithId } from "mongodb";
import { db, teardown } from "./searchConnector";
import {
BranchEntry,
DatabaseDocument,
DocsetsDocument,
ReposBranchesDocument,
} from "./types";
import { assertTrailingSlash } from "./utils";
import { deleteStaleProperties } from "./deleteStaleProperties";

// helper function to find the associated branch
const getBranch = (branches: any, branchName: string) => {
export const getBranch = (branches: Array<BranchEntry>, branchName: string) => {
for (const branchObj of branches) {
if (branchObj.gitBranchName.toLowerCase() == branchName.toLowerCase()) {
return branchObj;
return { ...branchObj };
}
}
return undefined;
};

export const _getBranch = (branches: any, branchName: string) => {
return getBranch(branches, branchName);
throw new Error(`Branch ${branchName} not found in branches object`);
};

const getProperties = async (branchName: string) => {
Expand All @@ -35,7 +33,7 @@ const getProperties = async (branchName: string) => {

let dbSession: Db;
let repos_branches: Collection<DatabaseDocument>;
let docsets;
let docsets: Collection<DatabaseDocument>;
let url: string = "";
let searchProperty: string = "";
let includeInGlobalSearch: boolean = false;
Expand All @@ -45,11 +43,10 @@ const getProperties = async (branchName: string) => {

try {
//connect to database and get repos_branches, docsets collections
dbSession = await db(ATLAS_CLUSTER0_URI, SNOOTY_DB_NAME);
dbSession = await db({ uri: ATLAS_CLUSTER0_URI, dbName: SNOOTY_DB_NAME });
repos_branches = dbSession.collection<DatabaseDocument>("repos_branches");
docsets = dbSession.collection<DatabaseDocument>("docsets");
} catch (e) {
console.log("issue starting session for Snooty Pool Database", e);
throw new Error(`issue starting session for Snooty Pool Database ${e}`);
}

Expand Down Expand Up @@ -83,16 +80,24 @@ const getProperties = async (branchName: string) => {
const { project } = repo;

try {
const {
urlSlug,
gitBranchName,
isStableBranch,
}: {
urlSlug: string;
gitBranchName: string;
isStableBranch: boolean;
active: boolean;
} = getBranch(repo.branches, branchName);
const docsetsQuery = { project: { $eq: project } };
docsetRepo = await docsets.findOne<DocsetsDocument>(docsetsQuery);
if (docsetRepo) {
//TODO: change based on environment
url = assertTrailingSlash(
docsetRepo.url?.dotcomprd + docsetRepo.prefix.dotcomprd
);
}
} catch (e) {
console.error(`Error while getting docsets entry in Atlas ${e}`);
throw e;
}

try {
const { isStableBranch, gitBranchName, active, urlSlug } = getBranch(
repo.branches,
branchName
);
includeInGlobalSearch = isStableBranch;
version = urlSlug || gitBranchName;
searchProperty = `${repo.search?.categoryName ?? project}-${version}`;
Expand All @@ -102,30 +107,23 @@ const getProperties = async (branchName: string) => {
!repo.prodDeployable ||
!repo.search?.categoryTitle
) {
//TODO: deletestaleproperties here potentially instead of throwing or returning
// deletestaleproperties here for ALL manifests beginning with this repo? or just for this project-version searchproperty
await deleteStaleProperties(project);
throw new Error(
`Search manifest should not be generated for repo ${REPO_NAME}`
`Search manifest should not be generated for repo ${REPO_NAME}. Removing all associated manifests`
);
}
} catch (e) {
console.error(`Error`, e);
throw e;
}

try {
const docsetsQuery = { project: { $eq: project } };
docsetRepo = await docsets.findOne<DocsetsDocument>(docsetsQuery);
if (docsetRepo) {
//TODO: change based on environment
url = assertTrailingSlash(
docsetRepo.url?.dotcomprd + docsetRepo.prefix.dotcomprd
if (!active) {
deleteStaleProperties(searchProperty);
throw new Error(
`Search manifest should not be generated for inactive version ${version} of repo ${REPO_NAME}. Removing all associated manifests`
);
}
} catch (e) {
console.error(`Error while getting docsets entry in Atlas ${e}`);
console.error(`Error`, e);
throw e;
}
await teardown();

return { searchProperty, url, includeInGlobalSearch };
};

Expand Down
16 changes: 5 additions & 11 deletions search-manifest/src/uploadToAtlas/searchConnector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,6 @@ import * as mongodb from "mongodb";

// We should only ever have one client active at a time.

//TODO: teardown after no longer need client
// export const teardown = async () => {
// await client.close();
// };

// cached db object, so we can handle initial connection process once if unitialized
let dbInstance: Db;
let client: mongodb.MongoClient;
Expand All @@ -17,16 +12,15 @@ export const teardown = async () => {
};

// Handles memoization of db object, and initial connection logic if needs to be initialized
export const db = async (uri: string, db_name: string) => {
export const db = async ({ uri, dbName }: { uri: string; dbName: string }) => {
client = new mongodb.MongoClient(uri);
try {
await client.connect();
dbInstance = client.db(db_name);
dbInstance = client.db(dbName);
} catch (error) {
console.error(
`Error at db client connection: ${error} for uri ${uri} and db name ${db_name}`
);
throw error;
const err = `Error at db client connection: ${error} for uri ${uri} and db name ${dbName}`;
console.error(err);
throw err;
}
return dbInstance;
};
Loading