觉得 落网 的音乐很不错,正好又在学习 R语言的爬虫,顺便就写了一个批量下载脚本。
## 首先加载 R包
library(RCurl)
library(rvest)
## 下载函数
Luo_Volume <- function(Volume){
V_url <- read_html(paste("http://www.luoo.net/music/", Volume, sep = ""))
Title <- html_text(html_nodes(V_url, "a.trackname.btn-play"))
ID <- substr(Title, 1, 2)
j <- 1
for (i in ID){
Song_URL <- paste("http://luoo-mp3.kssws.ks-cdn.com/low/luoo/radio801/",
i, ".mp3", sep = "")
Song <- getURLContent(Song_URL)
Name <- paste(Volume, "-", Title[j], ".mp3", sep = "")
save(Song, file = Name, compress = F)
j <- j + 1
## 随机休眠 2-8 秒,防止反爬虫,可以注释掉
Sleep <- sample(2:8, 1)
Sys.sleep(Sleep)
print(c(Name, paste("Wait", Sleep, "seconds")))
}
}
## 大展伸手的时候到了!
### 下载单个期刊,如,801期,http://www.luoo.net/music/801
Luo_Volume(801)
### 同时下载多个期刊,如 800 到 811,直接
for (i in 800:811){Luo_Volume(i)}
注:某些特殊后缀资源不能下载,如 http://luoo-mp3.kssws.ks-cdn.com/low/2014/0506_01.mp3
## 首先生成每日推荐的 ID
DayID <- function(Start = "2014-06-05", End = Sys.Date()){
S <- as.Date(Start)
E <- as.Date(End)
Total_days <- E - S
ID <- vector(length = Total_days)
for (i in 1:Total_days){
ID[i] <- format(S + i, "%Y%m%d")
}
ID <- as.numeric(ID)
}
## 下载函数
Download <- function(ID){
for (i in ID){
Song_URL <- paste("http://luoo-mp3.kssws.ks-cdn.com/low/chinese/",
i, ".mp3", sep = "")
Song <- getURLContent(Song_URL)
Name <- paste(i, ".mp3", sep = "")
if (object.size(Song) > 100) {
save(Song, file = Name, compress = F)
## 随机休眠 2-8 秒,防止反爬虫,可以注释掉
Sleep <- sample(2:8, 1)
print(c(Name, paste("Wait", Sleep, "seconds")))
Sys.sleep(Sleep)
}
}
}
## 使用方法
### 自选指定日的推荐曲目
ID <- 20160201
Download(ID)
### 多个日期,如二月和三月,月份之间用分号隔开
ID <- c(20160201:20160229, 20160301:20160331)
Download(ID)
### 选择起止日期,下载两个日期之间的全部推荐单曲,如 2015-01-01 到 2015-01-10 之间
ID <- DayID("2015-01-01", "2015-01-10");
Download(ID)
### 丧心病狂模式,下载所有日期里的推荐曲目
ID <- DayID()
Download(ID)