|
27 | 27 | choices=ALL_SCRAPERS.keys(),
|
28 | 28 | help="Specify one or more scrapers to use",
|
29 | 29 | )
|
| 30 | + parser.add_argument("--no-fetch", action="store_true", help="Skip fetching data") |
| 31 | + parser.add_argument("--no-parse", action="store_true", help="Skip parsing data") |
30 | 32 | args = parser.parse_args()
|
31 | 33 |
|
32 | 34 | # Ensure DB has the latest structure
|
|
40 | 42 | else:
|
41 | 43 | scrapers_to_use = sorted(set(args.scrapers))
|
42 | 44 |
|
43 |
| - # Fetch data |
44 |
| - for scraper_name in scrapers_to_use: |
45 |
| - scraper = ALL_SCRAPERS[scraper_name] |
46 |
| - scraper.fetch_data() |
| 45 | + # Fetch data if not skipped |
| 46 | + if not args.no_fetch: |
| 47 | + for scraper_name in scrapers_to_use: |
| 48 | + scraper = ALL_SCRAPERS[scraper_name] |
| 49 | + scraper.fetch_data() |
47 | 50 |
|
48 | 51 | # Ensure tags and categories are created
|
49 | 52 | ensure_categories_created()
|
50 | 53 | ensure_tags_created()
|
51 | 54 |
|
52 |
| - # Parse the data |
53 |
| - for scraper_name in scrapers_to_use: |
54 |
| - scraper = ALL_SCRAPERS[scraper_name] |
55 |
| - for path in scraper.find_files(): |
56 |
| - parse_data(path, scraper) |
| 55 | + # Parse data if not skipped |
| 56 | + if not args.no_parse: |
| 57 | + for scraper_name in scrapers_to_use: |
| 58 | + scraper = ALL_SCRAPERS[scraper_name] |
| 59 | + for path in scraper.find_files(): |
| 60 | + parse_data(path, scraper) |
57 | 61 |
|
58 | 62 | if args.serve:
|
59 | 63 | from streamlit.web.cli import main_run
|
|
0 commit comments