-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwiki_projects.py
executable file
·145 lines (118 loc) · 5.44 KB
/
wiki_projects.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#! /usr/bin/env python
# Copyright 2013 Mdgilbert
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import re
import wiki_categories
import uw_settings
from uw_db import uw_db
db = uw_db()
class wikiProj(object):
"""Class for fetching project data from the toolserver"""
cursor = None
wikiDB = "enwiki_p"
def __init__(self):
"""
Initialize the object
"""
# Setup the database connection
pass
## Query to get all WikiProjects (seems to work fairly well, returned 2083 rows in 1m46s)
## select page_id, page_title from page inner join category on page_title = cat_title where page_namespace = 4 and page_title like "WikiProject\_%";
## Query to get other "Active WikiProject" pages potentially missed by the previous query
## select page_id, page_title from page inner join categorylinks on page_id = cl_from where cl_to = "Active_WikiProjects" and page_namespace = 4 and page_title NOT LIKE "WikiProject_%";
def getProjects(self, filter = None):
"""
Fetches project information from the Toolserver DB.
Input:
filter - a SQL expression limiting the projects that will be returned (suitable for a WHERE clause).
Returns:
A sequence of projects including (<page id>, <project title>)
"""
self.cursor = db.getCursorForDB(self.wikiDB, thread = "p1")
f = " AND page_title LIKE '%%" + filter + "%%' " if filter else ""
query = '(SELECT page.page_id, page.page_title FROM %s.page INNER JOIN %s.category ON page.page_title = category.cat_title WHERE page_namespace = 4 AND page_title LIKE "WikiProject\_%%" %s GROUP BY page_id) UNION (SELECT page.page_id, page.page_title FROM %s.page INNER JOIN %s.categorylinks ON page.page_id = categorylinks.cl_from WHERE categorylinks.cl_to = "Active_WikiProjects" AND page.page_namespace = 4 AND page.page_title NOT LIKE "WikiProject_%%" %s GROUP BY page_id) ORDER BY page_id ASC'
db.execute(self.cursor, query, (self.wikiDB, self.wikiDB, f, self.wikiDB, self.wikiDB, f))
rows = self.cursor.fetchall()
self.cursor.close()
return rows
def getProjectPages(self, project, callback = None, id = None, thread = None, includeTitle = None):
"""
Fetches all the pages that are in a given project
"""
return wiki_categories.wikiCat().getPagesInCategory(project, callback = callback, id = id, thread = thread, includeTitle = includeTitle)
def testPrintProject(self, project):
"""
Test function to print project information from Toolserver DB
"""
pass
class localProj(object):
"""Class for fetching and/or updating project data on our local server"""
cursor = None
localDB = "reflex_relations"
def __init__(self):
"""
Initialize the object
"""
pass
def getProjects(self, filter = None):
"""
Fetches project data from the UW DB
Input:
filter - a SQL expression limiting the projects that will be returned (suitable for a WHERE clause).
Returns:
A sequence of projects including (<page id>, <project title>)
"""
self.cursor = db.getCursorForDB(self.localDB, thread = "p1")
f = " AND page_title LIKE '%%" + filter + "%%' " if filter else ""
query = 'SELECT p_id, p_title FROM n_project ORDER BY p_title ASC'
db.execute(self.cursor, query)
rows = self.cursor.fetchall()
self.cursor.close()
return rows
def clearProjects(self):
"""
Clears out all project data from the local DB
"""
self.cursor = db.getCursorForDB(self.localDB, thread = "p1")
db.execute(self.cursor, 'DELETE FROM n_project')
db.execute(self.cursor, 'DELETE FROM n_project_pages')
self.cursor.close()
def insertProjects(self, projects):
"""
Populates the project table.
Input:
projects - a sequence of projects of the form ((<project id>, <project title>), )
"""
self.cursor = db.getCursorForDB(self.localDB, thread = "p1")
values = []
space = []
for p in projects:
values += [str(p[0]), p[1]]
space.append("(%s,%s)")
query = 'INSERT INTO n_project (p_id, p_title) VALUES ' + ','.join(space)
db.execute(self.cursor, query, values)
self.cursor.close()
if __name__ == "__main__":
#print "Updating project list in local database from toolserver"
#localProj().updateProjects()
#localProj().testPrintProject("WikiProject_ACC")
"""
pages = wikiProj().getProjectPages("WikiProject_Zoo");
print pages
print "Total pages found: " + str( len(pages) )
"""
"""
projects = wikiProj().getProjects()
print projects
print "Total projects found: " + str( len(projects) )
"""
localProj().updateProjects()