|
1 |
| -from serpapi.serp_api_client_exception import SerpApiClientException |
| 1 | +from urllib import parse |
| 2 | +from serpapi import constant |
2 | 3 |
|
3 |
| -DEFAULT_START = 0 |
4 |
| -DEFAULT_END = 1000000000 |
5 |
| -DEFAULT_num = 10 |
6 |
| - |
7 |
| -# Paginate response in SearpApi |
| 4 | +# Paginate response in SerpApi |
8 | 5 | class Pagination:
|
9 |
| - |
10 |
| - def __init__(self, client, start = DEFAULT_START, end = DEFAULT_END, num = DEFAULT_num): |
11 |
| - # serp api client |
| 6 | + |
| 7 | + def __init__(self, client, start = constant.DEFAULT_START, end = constant.DEFAULT_END, num = constant.DEFAULT_PAGE_SIZE, limit = constant.DEFAULT_LIMIT): |
| 8 | + # SerpApi client |
12 | 9 | self.client = client
|
13 |
| - # range |
14 |
| - self.start = start |
15 |
| - self.end = end |
16 |
| - self.num = num |
17 |
| - |
18 |
| - # use value from the client |
19 |
| - if self.start == DEFAULT_START: |
20 |
| - if 'start' in self.client.params_dict: |
21 |
| - self.start = self.client.params_dict['start'] |
22 |
| - if self.end == DEFAULT_END: |
23 |
| - if 'end' in self.client.params_dict: |
24 |
| - self.end = self.client.params_dict['end'] |
25 |
| - if self.num == DEFAULT_num: |
26 |
| - if 'num' in self.client.params_dict: |
27 |
| - self.num = self.client.params_dict['num'] |
28 |
| - |
29 |
| - # basic check |
30 |
| - if self.start > self.end: |
31 |
| - raise SerpApiClientException("start: {} must be less than end: {}".format(self.start, self.end)) |
32 |
| - if(self.start + self.num) > self.end: |
33 |
| - raise SerpApiClientException("start + num: {} + {} must be less than end: {}".format(self.start, self.num, self.end)) |
| 10 | + |
| 11 | + self.limit = limit |
| 12 | + |
| 13 | + """Backwards-compatible workaround. |
| 14 | + `start`, `num`, and `end` parameters to `Pagination#__init__` are deprecated. |
| 15 | +
|
| 16 | + Set `start` and `num` search parameters. |
| 17 | + It works for Google Search API only. |
| 18 | + A correct way to set an offset, limit, and page size is in search parameters directly. |
| 19 | + (A hash that is passed to `SerpApi#__init__`.) |
| 20 | + """ |
| 21 | + if start != constant.DEFAULT_START: |
| 22 | + self.client.params_dict['start'] = start |
| 23 | + |
| 24 | + if end != constant.DEFAULT_END: |
| 25 | + self.client.params_dict['end'] = end |
| 26 | + |
| 27 | + if num != constant.DEFAULT_PAGE_SIZE: |
| 28 | + self.client.params_dict['num'] = num |
| 29 | + |
| 30 | + |
| 31 | + self.page_number = 0 |
34 | 32 |
|
35 | 33 | def __iter__(self):
|
36 |
| - self.update() |
37 | 34 | return self
|
38 | 35 |
|
39 |
| - def update(self): |
40 |
| - self.client.params_dict['start'] = self.start |
41 |
| - self.client.params_dict['num'] = self.num |
42 |
| - if self.start > 0: |
43 |
| - self.client.params_dict['start'] += 1 |
44 |
| - |
45 | 36 | def __next__(self):
|
46 |
| - # update parameter |
47 |
| - self.update() |
| 37 | + if self.page_number >= self.limit: |
| 38 | + raise StopIteration |
48 | 39 |
|
49 | 40 | # execute request
|
50 | 41 | result = self.client.get_dict()
|
51 | 42 |
|
52 |
| - # stop if backend miss to return serpapi_pagination |
53 |
| - if not 'serpapi_pagination' in result: |
| 43 | + pagination = result.get('serpapi_pagination', result.get('pagination')) |
| 44 | + |
| 45 | + # stop if backend miss to return `serpapi_pagination` or `pagination` |
| 46 | + if not pagination: |
54 | 47 | raise StopIteration
|
55 | 48 |
|
56 | 49 | # stop if no next page
|
57 |
| - if not 'next' in result['serpapi_pagination']: |
58 |
| - raise StopIteration |
59 |
| - |
60 |
| - # ends if no next page |
61 |
| - if self.start + self.num > self.end: |
62 |
| - raise StopIteration |
63 |
| - |
64 |
| - # increment start page |
65 |
| - self.start += self.num |
| 50 | + if not 'next' in pagination: |
| 51 | + raise StopIteration |
| 52 | + |
| 53 | + # Get actual parameters from next page of target website |
| 54 | + params_from_target_website = dict( |
| 55 | + parse.parse_qsl(parse.urlsplit(pagination['next']).query) |
| 56 | + ) |
| 57 | + |
| 58 | + # stop if parameters from the target website were not changed |
| 59 | + if params_from_target_website.items() <= self.client.params_dict.items(): |
| 60 | + raise StopIteration |
| 61 | + |
| 62 | + self.client.params_dict.update(params_from_target_website) |
| 63 | + |
| 64 | + self.page_number += 1 |
66 | 65 |
|
67 | 66 | return result
|
0 commit comments