Skip to content

Commit 03e23f8

Browse files
committedMay 26, 2020
Node爬虫练习
1 parent d95dd60 commit 03e23f8

9 files changed

+95
-113
lines changed
 

‎README.md

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
- Lesson8 - [Node操作数据库](/lesson8)
1212
- Lesson9 - [Koa快速入门教程](/lesson9)
1313
- Lesson10 - [Node.js使用Nodemailer发送邮件](/lesson10)
14+
- Lesson11 - [Node爬虫练习](/lesson11)
1415

1516
## Node相关入门资料
1617

‎avatar.js ‎lesson11/avatar.js

+12-12
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,26 @@ var options = {
1616
url: url,
1717
headers: {
1818
authorization:
19-
"Bearer Mi4xRGViWEF3QUFBQUFBSU1KQjNudUNEQmNBQUFCaEFsVk4xU1lGV2dERnlQQzRVYXVQNVNqSmZIUnl1WWY1RkFSczJR|1507695061|f575db5eb9910d90c54f8e92ede7bb0fcfe795e0"
20-
}
19+
"Bearer Mi4xRGViWEF3QUFBQUFBSU1KQjNudUNEQmNBQUFCaEFsVk4xU1lGV2dERnlQQzRVYXVQNVNqSmZIUnl1WWY1RkFSczJR|1507695061|f575db5eb9910d90c54f8e92ede7bb0fcfe795e0",
20+
},
2121
};
2222
var users = [];
2323

2424
// 获取大量的用户数据
2525
function getDataList(url) {
2626
options.url = url;
27-
request.get(options, function(error, response, body) {
27+
request.get(options, function (error, response, body) {
2828
if (!error && response.statusCode == 200) {
2929
var response = JSON.parse(response.body);
3030
var zhList = response.data;
31-
zhList.forEach(function(item) {
31+
zhList.forEach(function (item) {
3232
//item.gender == 0 性别判断
3333
if (item.gender == 0) {
3434
console.log(`正在抓取${item.avatar_url}`);
3535
users.push({
3636
name: item.name,
3737
img: item.avatar_url.replace("_is", ""),
38-
url_token: item.url_token
38+
url_token: item.url_token,
3939
});
4040
}
4141
});
@@ -65,7 +65,7 @@ function getDataList(url) {
6565

6666
// 把数据下载保存到data.js
6767
function downLoadContent(cont) {
68-
fs.appendFile("./" + "data.js", "module.exports =" + cont, "utf-8", function(
68+
fs.appendFile("./" + "data.js", "module.exports =" + cont, "utf-8", function (
6969
err
7070
) {
7171
if (err) {
@@ -81,17 +81,17 @@ var eyeUrl = "http://api.eyekey.com/face/Check/checking";
8181
var config = {
8282
app_id: "f89ae61fd63d4a63842277e9144a6bd2",
8383
app_key: "af1cd33549c54b27ae24aeb041865da2",
84-
url: "https://pic4.zhimg.com/43fda2d268bd17c561ab94d3cb8c80eb.jpg"
84+
url: "https://pic4.zhimg.com/43fda2d268bd17c561ab94d3cb8c80eb.jpg",
8585
};
8686

8787
function face(item) {
8888
config.url = item.img;
8989
request.post(
9090
{
9191
url: eyeUrl,
92-
form: config
92+
form: config,
9393
},
94-
function(error, response, body) {
94+
function (error, response, body) {
9595
if (!error && response.statusCode == 200) {
9696
var data = JSON.parse(body);
9797
try {
@@ -110,7 +110,7 @@ function face(item) {
110110

111111
// 下载图片方法
112112
function downLoadImg(image) {
113-
request.head(image.img, function(err, res, body) {
113+
request.head(image.img, function (err, res, body) {
114114
if (err) {
115115
console.log(err);
116116
}
@@ -133,11 +133,11 @@ function startDownLoad(imgdata) {
133133
async.eachLimit(
134134
imgdata,
135135
3,
136-
function(item, callback) {
136+
function (item, callback) {
137137
face(item);
138138
callback();
139139
},
140-
function(err) {
140+
function (err) {
141141
if (err) {
142142
console.log(err);
143143
} else {

‎boatTicket.js ‎lesson11/boatTicket.js

+12-15
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,15 @@ function get() {
2424

2525
async.eachSeries(
2626
url_list,
27-
function(arr_url, callback) {
27+
function (arr_url, callback) {
2828
console.log("正在抓取" + arr_url + "的数据...");
29-
request(arr_url, function(err, data) {
29+
request(arr_url, function (err, data) {
3030
if (err) {
3131
return console.error(err);
3232
}
3333
// 通过cheerio的load方法解析整个文档,就是html页面所有内容,可以通过console.log($.html());在控制台查看
3434
var $ = cheerio.load(data.body.toString());
35-
$(".tptx_ytgt .tptx_ytgt_4").each(function() {
35+
$(".tptx_ytgt .tptx_ytgt_4").each(function () {
3636
var $me = $(this);
3737
//解析船公司和船字段
3838
var arr1 = analyStr($me.find(".tptx_ytgt_2b a").text());
@@ -45,32 +45,29 @@ function get() {
4545
$me.find(".tptx_jcyj_2ab_1 ul li:first-child").text()
4646
), // 航线
4747
txtStartDate: analyStart(
48-
$me
49-
.find(".tptx_jcyj_2ab_1 li")
50-
.eq(1)
51-
.text(),
48+
$me.find(".tptx_jcyj_2ab_1 li").eq(1).text(),
5249
$me.find(".tptx_jcyj_2ab_1 span").text()
5350
),
5451
numDay: Number(arr2[1]), // 天数
5552
numNight: Number(arr2[0]), //夜数
5653
numPrice: analyPrice(
5754
$me.find(".tptx_jcyj_2ac .tptx_jcyj_2ac_1").text()
5855
), // 价格
59-
txtUrl: domain + $me.find(".tptx_ytgt_2b a").attr("href") //详情url
56+
txtUrl: domain + $me.find(".tptx_ytgt_2b a").attr("href"), //详情url
6057
};
6158
list.push(item);
6259
});
6360
callback(err, list);
6461
});
6562
},
66-
function(err) {
63+
function (err) {
6764
if (err) {
6865
return console.error(err.stack);
6966
}
7067
/*写入数据库*/
7168
async.eachSeries(
7269
list,
73-
function(record, callback) {
70+
function (record, callback) {
7471
console.log("正在写入" + record.txtStartDate + "的数据...");
7572
var str = "";
7673
for (var i in record) {
@@ -85,18 +82,18 @@ function get() {
8582
")";
8683
dbUtil
8784
.EXECUTE(sql)
88-
.then(res => {
85+
.then((res) => {
8986
console.log("res==>", res);
9087
// 受影响的行数
9188
if (res.affectedRows == 1) {
9289
callback(err);
9390
}
9491
})
95-
.catch(data => {
92+
.catch((data) => {
9693
console.log("data==>", data);
9794
});
9895
},
99-
function(err) {
96+
function (err) {
10097
if (err) {
10198
return console.error(err.stack);
10299
}
@@ -175,12 +172,12 @@ get();
175172

176173
/*处理乱码*/
177174
function reconvert(str) {
178-
str = str.replace(/(\\u)(\w{4})/gi, function($0) {
175+
str = str.replace(/(\\u)(\w{4})/gi, function ($0) {
179176
return String.fromCharCode(
180177
parseInt(escape($0).replace(/(%5Cu)(\w{4})/g, "$2"), 16)
181178
);
182179
});
183-
str = str.replace(/(&#x)(\w{4});/gi, function($0) {
180+
str = str.replace(/(&#x)(\w{4});/gi, function ($0) {
184181
return String.fromCharCode(
185182
parseInt(escape($0).replace(/(%26%23x)(\w{4})(%3B)/g, "$2"), 16)
186183
);

‎cnblog.js ‎lesson11/cnblog.js

+10-15
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ for (var i = 1; i <= pageNum; i++) {
3030
// 抓取昵称、入园年龄、粉丝数、关注数
3131
function personInfo(url) {
3232
var infoArray = {};
33-
superagent.get(url).end(function(err, ares) {
33+
superagent.get(url).end(function (err, ares) {
3434
if (err) {
3535
console.log(err);
3636
return;
@@ -45,12 +45,7 @@ function personInfo(url) {
4545

4646
// 小概率异常抛错
4747
try {
48-
age =
49-
"20" +
50-
info
51-
.eq(1)
52-
.attr("title")
53-
.split("20")[1];
48+
age = "20" + info.eq(1).attr("title").split("20")[1];
5449
} catch (err) {
5550
console.log(err);
5651
age = "2012-11-06";
@@ -90,7 +85,7 @@ function start() {
9085
// 设置字符编码(去掉中文会乱码)
9186
res.writeHead(200, { "Content-Type": "text/html;charset=utf-8" });
9287
// 当所有 'BlogArticleHtml' 事件完成后的回调触发下面事件
93-
ep.after("BlogArticleHtml", pageUrls.length * 20, function(articleUrls) {
88+
ep.after("BlogArticleHtml", pageUrls.length * 20, function (articleUrls) {
9489
// 获取 BlogPageUrl 页面内所有文章链接
9590
for (var i = 0; i < articleUrls.length; i++) {
9691
res.write(articleUrls[i] + "<br/>");
@@ -104,7 +99,7 @@ function start() {
10499

105100
//控制并发数
106101
var curCount = 0;
107-
var reptileMove = function(url, callback) {
102+
var reptileMove = function (url, callback) {
108103
//延迟毫秒数
109104
var delay = parseInt((Math.random() * 30000000) % 1000, 10);
110105
curCount++;
@@ -116,7 +111,7 @@ function start() {
116111
",耗时" + delay + "毫秒"
117112
);
118113

119-
superagent.get(url).end(function(err, sres) {
114+
superagent.get(url).end(function (err, sres) {
120115
// 常规的错误处理
121116
if (err) {
122117
console.log(err);
@@ -159,7 +154,7 @@ function start() {
159154
}
160155
});
161156

162-
setTimeout(function() {
157+
setTimeout(function () {
163158
curCount--;
164159
callback(null, url + "Call back content");
165160
}, delay);
@@ -171,10 +166,10 @@ function start() {
171166
async.mapLimit(
172167
articleUrls,
173168
5,
174-
function(url, callback) {
169+
function (url, callback) {
175170
reptileMove(url, callback);
176171
},
177-
function(err, result) {
172+
function (err, result) {
178173
endDate = new Date();
179174

180175
console.log("final:");
@@ -237,8 +232,8 @@ function start() {
237232
});
238233

239234
// 轮询 所有文章列表页
240-
pageUrls.forEach(function(pageUrl) {
241-
superagent.get(pageUrl).end(function(err, pres) {
235+
pageUrls.forEach(function (pageUrl) {
236+
superagent.get(pageUrl).end(function (err, pres) {
242237
console.log("fetch " + pageUrl + " successful");
243238
res.write("fetch " + pageUrl + " successful<br/>");
244239
// 常规的错误处理

‎crawlExample.js ‎lesson11/crawlExample.js

+12-12
Original file line numberDiff line numberDiff line change
@@ -15,26 +15,26 @@ var options = {
1515
url: url,
1616
headers: {
1717
authorization:
18-
"Bearer Mi4xRGViWEF3QUFBQUFBSU1KQjNudUNEQmNBQUFCaEFsVk4xU1lGV2dERnlQQzRVYXVQNVNqSmZIUnl1WWY1RkFSczJR|1507695061|f575db5eb9910d90c54f8e92ede7bb0fcfe795e0"
19-
}
18+
"Bearer Mi4xRGViWEF3QUFBQUFBSU1KQjNudUNEQmNBQUFCaEFsVk4xU1lGV2dERnlQQzRVYXVQNVNqSmZIUnl1WWY1RkFSczJR|1507695061|f575db5eb9910d90c54f8e92ede7bb0fcfe795e0",
19+
},
2020
};
2121
var users = [];
2222

2323
// 获取大量的用户数据
2424
function getDataList(url) {
2525
options.url = url;
26-
request.get(options, function(error, response, body) {
26+
request.get(options, function (error, response, body) {
2727
if (!error && response.statusCode == 200) {
2828
var response = JSON.parse(response.body);
2929
var zhList = response.data;
30-
zhList.forEach(function(item) {
30+
zhList.forEach(function (item) {
3131
//item.gender == 0 性别判断
3232
if (item.gender == 0) {
3333
console.log(`正在抓取${item.avatar_url}`);
3434
users.push({
3535
name: item.name,
3636
img: item.avatar_url.replace("_is", ""),
37-
url_token: item.url_token
37+
url_token: item.url_token,
3838
});
3939
}
4040
});
@@ -64,7 +64,7 @@ function getDataList(url) {
6464

6565
// 把数据下载保存到data.js
6666
function downLoadContent(cont) {
67-
fs.appendFile("./" + "data.js", "module.exports =" + cont, "utf-8", function(
67+
fs.appendFile("./" + "data.js", "module.exports =" + cont, "utf-8", function (
6868
err
6969
) {
7070
if (err) {
@@ -80,17 +80,17 @@ var eyeUrl = "http://api.eyekey.com/face/Check/checking";
8080
var config = {
8181
app_id: "f89ae61fd63d4a63842277e9144a6bd2",
8282
app_key: "af1cd33549c54b27ae24aeb041865da2",
83-
url: "https://pic4.zhimg.com/43fda2d268bd17c561ab94d3cb8c80eb.jpg"
83+
url: "https://pic4.zhimg.com/43fda2d268bd17c561ab94d3cb8c80eb.jpg",
8484
};
8585

8686
function face(item) {
8787
config.url = item.img;
8888
request.post(
8989
{
9090
url: eyeUrl,
91-
form: config
91+
form: config,
9292
},
93-
function(error, response, body) {
93+
function (error, response, body) {
9494
if (!error && response.statusCode == 200) {
9595
var data = JSON.parse(body);
9696
try {
@@ -108,7 +108,7 @@ function face(item) {
108108

109109
// 下载图片方法
110110
function downLoadImg(image) {
111-
request.head(image.img, function(err, res, body) {
111+
request.head(image.img, function (err, res, body) {
112112
if (err) {
113113
console.log(err);
114114
}
@@ -131,11 +131,11 @@ function startDownLoad(imgdata) {
131131
async.eachLimit(
132132
imgdata,
133133
3,
134-
function(item, callback) {
134+
function (item, callback) {
135135
face(item);
136136
callback();
137137
},
138-
function(err) {
138+
function (err) {
139139
if (err) {
140140
console.log(err);
141141
} else {

‎crawler.js ‎lesson11/crawler.js

+7-7
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ var Crawler = require("crawler");
33
var c = new Crawler({
44
maxConnections: 10,
55
// This will be called for each crawled page
6-
callback: function(error, res, done) {
6+
callback: function (error, res, done) {
77
if (error) {
88
console.log(error);
99
} else {
@@ -13,7 +13,7 @@ var c = new Crawler({
1313
console.log($("title").text());
1414
}
1515
done();
16-
}
16+
},
1717
});
1818

1919
// Queue just one URL, with default callback
@@ -29,20 +29,20 @@ c.queue([
2929
jQuery: false,
3030

3131
// The global callback won't be called
32-
callback: function(error, res, done) {
32+
callback: function (error, res, done) {
3333
if (error) {
3434
console.log(error);
3535
} else {
3636
console.log("Grabbed", res.body.length, "bytes");
3737
}
3838
done();
39-
}
40-
}
39+
},
40+
},
4141
]);
4242

4343
// Queue some HTML code directly without grabbing (mostly for tests)
4444
c.queue([
4545
{
46-
html: "<p>This is a <strong>test</strong></p>"
47-
}
46+
html: "<p>This is a <strong>test</strong></p>",
47+
},
4848
]);

‎dbUtil.js ‎lesson11/dbUtil.js

+17-17
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ if (local) {
1111
user: "root",
1212
password: "root",
1313
database: "crawl",
14-
multipleStatements: true //是否允许执行多条sql语句
14+
multipleStatements: true, //是否允许执行多条sql语句
1515
});
1616
} else {
1717
pool = mysql.createPool({
@@ -20,19 +20,19 @@ if (local) {
2020
user: "root",
2121
password: "websoft9",
2222
database: "motion",
23-
multipleStatements: true //是否允许执行多条sql语句
23+
multipleStatements: true, //是否允许执行多条sql语句
2424
});
2525
}
2626

2727
//将结果已对象数组返回
2828
var row = (sql, ...params) => {
29-
return new Promise(function(resolve, reject) {
30-
pool.getConnection(function(err, connection) {
29+
return new Promise(function (resolve, reject) {
30+
pool.getConnection(function (err, connection) {
3131
if (err) {
3232
reject(err);
3333
return;
3434
}
35-
connection.query(sql, params, function(error, res) {
35+
connection.query(sql, params, function (error, res) {
3636
connection.release();
3737
if (error) {
3838
reject(error);
@@ -46,13 +46,13 @@ var row = (sql, ...params) => {
4646

4747
//返回一个对象
4848
var first = (sql, ...params) => {
49-
return new Promise(function(resolve, reject) {
50-
pool.getConnection(function(err, connection) {
49+
return new Promise(function (resolve, reject) {
50+
pool.getConnection(function (err, connection) {
5151
if (err) {
5252
reject(err);
5353
return;
5454
}
55-
connection.query(sql, params, function(error, res) {
55+
connection.query(sql, params, function (error, res) {
5656
connection.release();
5757
if (error) {
5858
reject(error);
@@ -66,13 +66,13 @@ var first = (sql, ...params) => {
6666

6767
//返回单个查询结果
6868
var single = (sql, ...params) => {
69-
return new Promise(function(resolve, reject) {
70-
pool.getConnection(function(err, connection) {
69+
return new Promise(function (resolve, reject) {
70+
pool.getConnection(function (err, connection) {
7171
if (err) {
7272
reject(err);
7373
return;
7474
}
75-
connection.query(sql, params, function(error, res) {
75+
connection.query(sql, params, function (error, res) {
7676
connection.release();
7777
if (error) {
7878
reject(error);
@@ -90,15 +90,15 @@ var single = (sql, ...params) => {
9090

9191
//执行代码,返回执行结果
9292
var execute = (sql, ...params) => {
93-
return new Promise(function(resolve, reject) {
93+
return new Promise(function (resolve, reject) {
9494
// 获取连接
95-
pool.getConnection(function(err, connection) {
95+
pool.getConnection(function (err, connection) {
9696
if (err) {
9797
reject(err);
9898
return;
9999
}
100100
// 操作数据库
101-
connection.query(sql, params, function(error, res) {
101+
connection.query(sql, params, function (error, res) {
102102
// 释放
103103
connection.release();
104104
if (error) {
@@ -116,7 +116,7 @@ module.exports = {
116116
ROW: row,
117117
FIRST: first,
118118
SINGLE: single,
119-
EXECUTE: execute
119+
EXECUTE: execute,
120120
};
121121

122122
/*连接mysql*/
@@ -125,11 +125,11 @@ function connectToMysql() {
125125
host: "182.254.153.189",
126126
user: "root",
127127
password: "websoft9",
128-
database: "motion"
128+
database: "motion",
129129
});
130130
connection.connect();
131131
//查询
132-
connection.query("SELECT * FROM user;", function(err, rows, fields) {
132+
connection.query("SELECT * FROM user;", function (err, rows, fields) {
133133
if (err) throw err;
134134
console.log("The solution is: ", rows[0]);
135135
});

‎example.js ‎lesson11/example.js

+8-8
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ let index = 4;
1616
switch (index) {
1717
// 发送http请求
1818
case 0:
19-
request("http://www.baidu.com", function(error, response, body) {
19+
request("http://www.baidu.com", function (error, response, body) {
2020
if (!error && response.statusCode == 200) {
2121
console.log(body); // response.body
2222
}
@@ -25,7 +25,7 @@ switch (index) {
2525

2626
// cheerio解析dom
2727
case 1:
28-
request("http://www.baidu.com", function(error, response, body) {
28+
request("http://www.baidu.com", function (error, response, body) {
2929
if (!error && response.statusCode == 200) {
3030
var $ = cheerio.load(body.toString());
3131
console.log($("title").text());
@@ -37,12 +37,12 @@ switch (index) {
3737
case 2:
3838
async.eachSeries(
3939
objs,
40-
function(obj, callback) {
41-
doSomething(obj, function() {
40+
function (obj, callback) {
41+
doSomething(obj, function () {
4242
callback("err");
4343
});
4444
},
45-
function(err) {
45+
function (err) {
4646
console.log("err is:" + err);
4747
}
4848
);
@@ -55,11 +55,11 @@ switch (index) {
5555
host: "localhost",
5656
user: "root",
5757
password: "root",
58-
database: "crawl"
58+
database: "crawl",
5959
});
6060
connection.connect();
6161
// 查询
62-
connection.query("SELECT * FROM tb_crawl_boat_ticket", function(
62+
connection.query("SELECT * FROM tb_crawl_boat_ticket", function (
6363
err,
6464
rows,
6565
fields
@@ -80,7 +80,7 @@ switch (index) {
8080
times.push(i);
8181
}
8282
rule.second = times;
83-
schedule.scheduleJob(rule, function() {
83+
schedule.scheduleJob(rule, function () {
8484
console.log("开始执行" + new Date());
8585
});
8686
break;

‎news.js ‎lesson11/news.js

+16-27
Original file line numberDiff line numberDiff line change
@@ -14,45 +14,36 @@ function fetchPage(x) {
1414
function startRequest(x) {
1515
//采用http模块向服务器发起一次get请求
1616
http
17-
.get(x, function(res) {
17+
.get(x, function (res) {
1818
var html = ""; //用来存储请求网页的整个html内容
1919
var titles = [];
2020
res.setEncoding("utf-8"); //防止中文乱码
2121
//监听data事件,每次取一块数据
22-
res.on("data", function(chunk) {
22+
res.on("data", function (chunk) {
2323
html += chunk;
2424
});
2525
//监听end事件,如果整个网页内容的html都获取完毕,就执行回调函数
26-
res.on("end", function() {
26+
res.on("end", function () {
2727
var $ = cheerio.load(html); //采用cheerio模块解析html
2828

29-
var time = $(".article-info a:first-child")
30-
.next()
31-
.text()
32-
.trim();
29+
var time = $(".article-info a:first-child").next().text().trim();
3330

3431
var news_item = {
3532
//获取文章的标题
36-
title: $("div.article-title a")
37-
.text()
38-
.trim(),
33+
title: $("div.article-title a").text().trim(),
3934
//获取文章发布的时间
4035
Time: time,
4136
//获取当前文章的url
4237
link:
4338
"http://www.ss.pku.edu.cn" + $("div.article-title a").attr("href"),
4439
//获取供稿单位
45-
author: $("[title=供稿]")
46-
.text()
47-
.trim(),
40+
author: $("[title=供稿]").text().trim(),
4841
//i是用来判断获取了多少篇文章
49-
i: (i = i + 1)
42+
i: (i = i + 1),
5043
};
5144

5245
console.log(news_item); //打印新闻信息
53-
var news_title = $("div.article-title a")
54-
.text()
55-
.trim();
46+
var news_title = $("div.article-title a").text().trim();
5647

5748
savedContent($, news_title); //存储每篇文章的内容及文章标题
5849

@@ -68,21 +59,23 @@ function startRequest(x) {
6859
}
6960
});
7061
})
71-
.on("error", function(err) {
62+
.on("error", function (err) {
7263
console.log(err);
7364
});
7465
}
7566
//该函数的作用:在本地存储所爬取的新闻内容资源
7667
function savedContent($, news_title) {
77-
$(".article-content p").each(function(index, item) {
68+
$(".article-content p").each(function (index, item) {
7869
var x = $(this).text();
7970

8071
var y = x.substring(0, 2).trim();
8172

8273
if (y == "") {
8374
x = x + "\n";
8475
//将新闻文本内容一段一段添加到/data文件夹下,并用新闻的标题来命名文件
85-
fs.appendFile("./data/" + news_title + ".txt", x, "utf-8", function(err) {
76+
fs.appendFile("./data/" + news_title + ".txt", x, "utf-8", function (
77+
err
78+
) {
8679
if (err) {
8780
console.log(err);
8881
}
@@ -92,12 +85,8 @@ function savedContent($, news_title) {
9285
}
9386
//该函数的作用:在本地存储所爬取到的图片资源
9487
function savedImg($, news_title) {
95-
$(".article-content img").each(function(index, item) {
96-
var img_title = $(this)
97-
.parent()
98-
.next()
99-
.text()
100-
.trim(); //获取图片的标题
88+
$(".article-content img").each(function (index, item) {
89+
var img_title = $(this).parent().next().text().trim(); //获取图片的标题
10190
if (img_title.length > 35 || img_title == "") {
10291
img_title = "Null";
10392
}
@@ -106,7 +95,7 @@ function savedImg($, news_title) {
10695
var img_src = "http://www.ss.pku.edu.cn" + $(this).attr("src"); //获取图片的url
10796

10897
//采用request模块,向服务器发起一次请求,获取图片资源
109-
request.head(img_src, function(err, res, body) {
98+
request.head(img_src, function (err, res, body) {
11099
if (err) {
111100
console.log(err);
112101
}

0 commit comments

Comments
 (0)
Please sign in to comment.