Skip to content

Commit

Permalink
Merge pull request #26 from sean1832/0.3
Browse files Browse the repository at this point in the history
Improved cookie handling flexibility
  • Loading branch information
sean1832 authored Dec 14, 2024
2 parents 0c98a0c + f96c55a commit 968dffc
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 11 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,15 @@ After obtaining cookies, you can use them to scrape private boards and pins.
```python
from pinterest_dl import PinterestDL

# Load cookies from a file
with open("cookies.json", "r") as f:
cookies = json.load(f)

# Initialize and run the Pinterest image downloader with specified settings
images = (
PinterestDL.with_api()
.with_cookies(
"cookies.json", # Path to cookies file
cookies, # cookies in selenium format
)
.scrape_and_download(
url="https://www.pinterest.com/pin/1234567", # Assume this is a private board URL
Expand Down
2 changes: 1 addition & 1 deletion pinterest_dl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.3.1"
__version__ = "0.3.2"
__description__ = "An unofficial Pinterest image downloader"

from typing import Literal
Expand Down
8 changes: 4 additions & 4 deletions pinterest_dl/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,12 @@ def main() -> None:
elif args.cmd == "scrape":
if args.client in ["chrome", "firefox"]:
PinterestDL.with_browser(
browser_type=args.client,
browser_type=args.client, # type: ignore
timeout=args.timeout,
headless=not args.headful,
incognito=args.incognito,
verbose=args.verbose,
).with_cookies(args.cookies).scrape_and_download(
).with_cookies_path(args.cookies).scrape_and_download(
args.url,
args.output,
args.limit,
Expand All @@ -143,7 +143,7 @@ def main() -> None:
"Warning: Incognito and headful mode is only available for Chrome/Firefox."
)

PinterestDL.with_api(timeout=args.timeout, verbose=args.verbose).with_cookies(
PinterestDL.with_api(timeout=args.timeout, verbose=args.verbose).with_cookies_path(
args.cookies
).scrape_and_download(
args.url,
Expand All @@ -165,7 +165,7 @@ def main() -> None:
"Warning: Incognito and headful mode is only available for Chrome/Firefox."
)

PinterestDL.with_api(timeout=args.timeout, verbose=args.verbose).with_cookies(
PinterestDL.with_api(timeout=args.timeout, verbose=args.verbose).with_cookies_path(
args.cookies
).search_and_download(
args.query,
Expand Down
21 changes: 19 additions & 2 deletions pinterest_dl/scrapers/scraper_api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import time
from pathlib import Path
from typing import List, Optional, Tuple, Union
from typing import List, Optional, Tuple, Union, Any

from tqdm import tqdm

Expand Down Expand Up @@ -28,7 +28,24 @@ def __init__(self, timeout: float = 5, verbose: bool = False) -> None:
self.verbose = verbose
self.cookies = None

def with_cookies(self, cookies_path: Optional[Union[str, Path]]) -> "_ScraperAPI":
def with_cookies(self, cookies:list[dict[str, Any]]) -> "_ScraperAPI":
"""Load cookies to the current session.
Args:
cookies (list[dict]): List of cookies in Selenium format.
Returns:
_ScraperAPI: Instance of ScraperAPI with cookies loaded.
"""
if isinstance(cookies, str) or isinstance(cookies, Path):
raise ValueError("Invalid cookies format. Expected a list of dictionary. In Selenium format."+
"If you want to load cookies from a file, use `with_cookies_path` method instead.")
if not isinstance(cookies, list):
raise ValueError("Invalid cookies format. Expected a list of dictionary. In Selenium format.")
self.cookies = PinterestCookieJar().from_selenium_cookies(cookies)
return self

def with_cookies_path(self, cookies_path: Optional[Union[str, Path]]) -> "_ScraperAPI":
"""Load cookies from a file to the current session.
Args:
Expand Down
27 changes: 24 additions & 3 deletions pinterest_dl/scrapers/scraper_webdriver.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import time
from pathlib import Path
from typing import List, Literal, Optional, Tuple, Union
from typing import Any, List, Literal, Optional, Tuple, Union

from selenium.webdriver.remote.webdriver import WebDriver

Expand All @@ -17,7 +17,28 @@ def __init__(self, webdriver: WebDriver, timeout: float = 3, verbose: bool = Fal
self.verbose = verbose
self.webdriver: WebDriver = webdriver

def with_cookies(
def with_cookies(self, cookies: list[dict[str, Any]], wait_sec: float = 1) -> "_ScraperWebdriver":
"""Load cookies to the current browser session.
Args:
cookies (list[dict]): List of cookies to load.
wait_sec (float): Time in seconds to wait after loading cookies.
Returns:
_ScraperWebdriver: Instance of ScraperWebdriver with cookies loaded.
"""
if isinstance(cookies, str) or isinstance(cookies, Path):
raise ValueError("Invalid cookies format. Expected a list of dictionary. In Selenium format."+
"If you want to load cookies from a file, use `with_cookies_path` method instead.")
if not isinstance(cookies, list):
raise ValueError("Invalid cookies format. Expected a list of dictionary. In Selenium format.")
cookies = self._sanitize_cookies(cookies)
for cookie in cookies:
self.webdriver.add_cookie(cookie)
time.sleep(wait_sec)
return self

def with_cookies_path(
self, cookies_path: Optional[Union[str, Path]], wait_sec: float = 1
) -> "_ScraperWebdriver":
"""Load cookies from a file to the current browser session.
Expand Down Expand Up @@ -46,7 +67,7 @@ def with_cookies(
# Selenium requires the page to be loaded before adding cookies
self.webdriver.get("https://www.pinterest.com")

cookies = _ScraperWebdriver._sanitize_cookies(cookies)
cookies = self._sanitize_cookies(cookies)
for cookie in cookies:
self.webdriver.add_cookie(cookie)
print(f"Loaded cookies from {cookies_path}")
Expand Down

0 comments on commit 968dffc

Please sign in to comment.