diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..f6c0080 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,13 @@ + +root = true + +[*] +indent_style = tab +end_of_line = lf +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true + +[*.yml] +indent_style = space +indent_size = 2 \ No newline at end of file diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..94f480d --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +* text=auto eol=lf \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c0a3ba7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +/node_modules +/package-lock.json +/yarn-lock.json +/pnpm-lock.yaml +/dist \ No newline at end of file diff --git a/.npmrc b/.npmrc new file mode 100644 index 0000000..9cf9495 --- /dev/null +++ b/.npmrc @@ -0,0 +1 @@ +package-lock=false \ No newline at end of file diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..65b89f0 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,17 @@ +language: node_js + +node_js: + - "12" + - "10" + +before_install: + - npm install -g pnpm + +install: + - pnpm install + +script: + - pnpm run test + +notifications: + email: false \ No newline at end of file diff --git a/__tests__/index.ts b/__tests__/index.ts new file mode 100644 index 0000000..24ea049 --- /dev/null +++ b/__tests__/index.ts @@ -0,0 +1,12 @@ +import {fetchMetaData} from '../src'; + +test('check meta-data', async () => { + const response = await fetchMetaData('https://microtip.now.sh'); + const keys = Object.keys(response); + + expect(keys).toHaveLength(4); + expect(response).toHaveProperty('basic_metadata'); + expect(response).toHaveProperty('opengraph'); + expect(response).toHaveProperty('opengraph_social'); + expect(response).toHaveProperty('favicons'); +}); diff --git a/__tests__/util.ts b/__tests__/util.ts new file mode 100644 index 0000000..72dc3e5 --- /dev/null +++ b/__tests__/util.ts @@ -0,0 +1,13 @@ +import {createValidUri} from '../src/util'; + +describe('URI validation', () => { + test('creates valid URI', () => { + const result: string = createValidUri('https://github.com/', '/rocktimsaikia'); + expect(result).toBe('https://github.com/rocktimsaikia'); + }); + + test('returns the path', () => { + const result: string = createValidUri('https://github.com/', 'https://github.com/rocktimsaikia'); + expect(result).toBe('https://github.com/rocktimsaikia'); + }); +}); diff --git a/example/index.js b/example/index.js new file mode 100644 index 0000000..ceedb99 --- /dev/null +++ b/example/index.js @@ -0,0 +1,9 @@ +const {fetchMetaData} = require('../dist'); + +(async () => { + const result = await fetchMetaData('https://hoppscotch.io/', { + userAgent: 'Rocktim', + fromEmail: 'srocktim61@gmail.com' + }); + console.log(result); +})(); diff --git a/example/index.ts b/example/index.ts new file mode 100644 index 0000000..7e9c394 --- /dev/null +++ b/example/index.ts @@ -0,0 +1,11 @@ +// Example script using url-fetch +'use strict'; +import {fetchMetaData} from '../src'; + +(async () => { + const result = await fetchMetaData('https://microtip.now.sh', { + userAgent: 'Rocktim', + fromEmail: 'srocktim61@gmail.com' + }); + console.log(result); +})(); diff --git a/license b/license new file mode 100644 index 0000000..a922934 --- /dev/null +++ b/license @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Rocktim Saikia + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/package.json b/package.json new file mode 100644 index 0000000..db9add9 --- /dev/null +++ b/package.json @@ -0,0 +1,76 @@ +{ + "name": "meta-fetch", + "version": "1.0.0", + "description": "Tiny URL meta-data fetcher that scraps the meta-data of a given URL string", + "keywords": [ + "meta-man", + "url-metadata", + "url-metadata-scrapper", + "metadata-scrapper", + "website-metadata", + "website-metadata-scrapper", + "metadata", + "scrapper" + ], + "bugs": { + "url": "https://github.com/rocktimsaikia/meta-fetch/issues" + }, + "license": "MIT", + "author": { + "name": "Rocktim Saikia", + "email": "rocktimthedev@gmail.com", + "url": "https://rocktim.xyz" + }, + "main": "dist/index.js", + "module": "dist/index.esm.js", + "types": "dist/index.d.ts", + "files": [ + "dist/**/*" + ], + "scripts": { + "prepublishOnly": "pnpm run prebuild && pnpm run test:dev && pnpm run build", + "prebuild": "rimraf dist", + "build": "pnpm run build:esm && pnpm run build:cjs", + "build:cjs": "tsc --module commonjs", + "build:esm": "tsc --module esnext && cpy dist/index.js dist --rename index.esm.js", + "dev": "ts-node example/index.ts", + "dev:js": "node --trace-warnings example/index.js", + "test:dev": "xo && pnpm run test", + "test": "jest --verbose" + }, + "xo": { + "extensions": [ + "ts" + ], + "ignores": [ + "__tests__/**/*.ts" + ], + "rules": { + "import/no-anonymous-default-export": 0, + "quote-props": 0 + } + }, + "jest": { + "preset": "ts-jest" + }, + "dependencies": { + "cheerio": "^1.0.0-rc.3", + "node-fetch": "^2.6.1" + }, + "devDependencies": { + "@types/cheerio": "^0.22.21", + "@types/jsdom": "^16.2.4", + "@types/node": "^14.6.4", + "@types/node-fetch": "^2.5.7", + "cpy-cli": "^3.1.1", + "jest": "^26.4.2", + "rimraf": "^3.0.2", + "ts-jest": "^26.3.0", + "ts-node": "^9.0.0", + "typescript": "^4.0.2", + "xo": "^0.33.1" + }, + "engines": { + "node": ">=12" + } +} \ No newline at end of file diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..8135bc5 --- /dev/null +++ b/readme.md @@ -0,0 +1,104 @@ +# meta-fetch :mag_right: + +> Tiny URL meta-data fetcher that scraps the meta-data of a given `URL` string. + +![Travis (.com) branch](https://img.shields.io/travis/com/rocktimsaikia/meta-fetcher/master) +[![XO code style](https://img.shields.io/badge/code_style-XO-5ed9c7.svg)](https://github.com/xojs/xo) + +Under the hood it uses [node-fetch](https://github.com/node-fetch/node-fetch) to fetch the metadata, parses it and returns it as json object. + +## Install + +```sh +npm install meta-fetch +``` + +## Basic Usage +```js +const {fetchMetaData} = require('meta-fetch'); + +(async () => { + const result = await fetchMetaData('https://hoppscotch.io/'); + console.log(result); + + /* + { + basic_metadata: { + website: 'https://hoppscotch.io/', + title: 'Hoppscotch • A free, fast and beautiful API request builder', + description: 'A free, fast and beautiful API request builder' + }, + opengraph: { + 'og:image': 'https://hoppscotch.io/banner.jpg', + 'og:type': 'website', + 'og:title': 'Hoppscotch', + 'og:site_name': 'Hoppscotch', + 'og:description': 'A free, fast and beautiful API request builder', + 'og:url': 'https://hoppscotch.io/' + }, + opengraph_social: { + 'twitter:card': 'summary_large_image', + 'twitter:site': '@liyasthomas', + 'twitter:creator': '@liyasthomas' + }, + favicons: [ + 'https://hoppscotch.io/icon.png', + 'https://hoppscotch.io/icon.png', + 'https://hoppscotch.io/_nuxt/icons/icon_64x64.9834b3.png' + ] + } + */ +})(); + +``` + +## Advanced Usage (with options) +You can optionally set the `userAgent` and `fromEmail` options in request `Header` while fetching the meta-data. + +```js +const {fetchMetaData} = require('meta-fetch'); + +(async () => { + const result = await fetchMetaData('https://hoppscotch.io/', { + userAgent: 'Rocktim', + fromEmail: 'srocktim61@gmail.com' + }); + console.log(result); +})(); +``` +
+It can also fetch meta-data from `shortened-url` .For example: +```js +const {fetchMetaData} = require('meta-fetch'); + +(async () => { + const result = await fetchMetaData('https://bit.ly/2Fj9sNF'); + console.log(result); +})(); +``` + +## Options +You can set these options in Header while fetching the data if needed. + +| Option | Required | Default Value | +| :------------- | :----------: | -----------: | +| `userAgent` | No | `meta-fetch` | +| `fromEMail` | No | `metafetch@email.com` | + +## API + +**metaDataFetch(url, options)** + +#### url +Type: `string` +url string that you want to fetch the meta-data from. + +#### options +Type: `object` +Optional `Header` paramerter you can set if needed. + +## Contribute +For any new feature request or bug report, please open an issue or pull request in GitHub. + +## License +MIT © [Rocktim Saikia](https://rocktim.xyz) \ No newline at end of file diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..72e39dc --- /dev/null +++ b/src/index.ts @@ -0,0 +1,123 @@ +'use strict'; +import fetch from 'node-fetch'; +import * as cheerio from 'cheerio'; +import {createValidUri} from './util'; + +/** + * Fetches meta data of a given website url + * @param url | the website url to fetch the metadata from + */ +export const fetchMetaData = async (url: string, _options?: Options): Promise> => { + try { + const urlString: string = url.trim(); + + // Options validation + if (!_options || typeof _options !== 'object') { + _options = {}; + } + + const options = { + userAgent: _options.userAgent ?? 'meta-fetch', + fromEmail: _options.userAgent ?? 'metafetch@email.com' + }; + + const requestOptions = { + method: 'GET', + headers: { + 'User-Agent': options.userAgent, + 'From': options.fromEmail + } + }; + + const response = await fetch(urlString, requestOptions); + const content = await response.text(); + + // Load html to cheerio + const $ = cheerio.load(content); + const head = $('head'); + + // Basic site meta-data + const basicMeta = (): Record => { + const website = response.url; + const title = head.find('title').text(); + const desc = head.find('meta[name=description]').attr('content'); + + return { + website, + title, + description: desc + }; + }; + + // Open graph basic + const fetchMeta = (): Record => { + const openGraphsArray = head.find('meta[property]'); + const openGraphs = {}; + openGraphsArray.each((_, element) => { + const property = $(element) + .attr('property'); + const content = $(element).attr('content'); + if (!property.includes('twitter')) { + openGraphs[property] = content; + } + }); + + return openGraphs; + }; + + // Open graph social + const fetchMetaSocial = (): Record => { + const openGraphsArray = head.find('meta[name]'); + const socials = {}; + + openGraphsArray.each((_, element) => { + const property = $(element).attr('name'); + const content = $(element).attr('content'); + + if (property.includes('twitter')) { + socials[property] = content; + } + }); + return socials; + }; + + // Favicons + const fetchFavicons = (): string[] => { + const faviconArray = head.find('link[rel]'); + const favicons: string[] = []; + + faviconArray.each((_, element) => { + const href = $(element).attr('href'); + + if (href.includes('shortcut icon') || href.includes('icon') || href.includes('apple-touch-startup-image') || href.includes('apple-touch-icon')) { + const validUri = createValidUri(response.url, href); + favicons.push(validUri); + } + }); + return favicons; + }; + + // Meta-data + const basicMetaData = basicMeta(); + const openGraphs = fetchMeta(); + const openGraph_social = fetchMetaSocial(); + const favicons = fetchFavicons(); + + const metaData: Record = { + basic_metadata: basicMetaData, + opengraph: openGraphs, + opengraph_social: openGraph_social, + favicons + }; + + return metaData; + } catch (error) { + console.error(error); + } +}; + +// Options interface +interface Options{ + userAgent?: string; + fromEmail?: string; +} diff --git a/src/util.ts b/src/util.ts new file mode 100644 index 0000000..8da10eb --- /dev/null +++ b/src/util.ts @@ -0,0 +1,14 @@ +'use strict'; +/** + * Checks if a favicon path is proper uri if not append the path to the host + * @param host | website host + * @param path | favicon relative path + */ +export const createValidUri = (host: string, path: string): string => { + if (path.includes(host)) { + return path; + } + + const updatedPath = path.replace('/', ''); + return `${host}${updatedPath}`; +}; diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..b03eaa6 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,17 @@ +{ + "compilerOptions": { + "outDir": "dist", + "target": "es2018", + "sourceMap": false, + "incremental": false, + "skipLibCheck": true, + "declaration": true, + "esModuleInterop": true, + "lib": [ + "es2018" + ] + }, + "include": [ + "src/**/*" + ] +} \ No newline at end of file