数据侠也爱八卦。近日,陈老师不知因何事忽然在微博上骂女神志玲姐姐,引起来网友们的热闹围观,导致前几天风风火火的汪峰
的前妻吸毒的事件,顿时落下帷幕,汪峰老师好不容易上了一次头条,就这么被硬生生的扯下来了。作为向来用数据说话的数据侠,如何用分析热点事件背后的数
(yu)据(le)意(ba)义(gua)呢?这一次数据侠KEN用R语言分析了微博上粉丝的舆论,通过词云可以发现,网友对于这种无端端撕B的行为总的
来说4个字:“你”“的”“不”“是”!
library(XML);
library(RCurl);
library(RJSONIO);
page <-1;
times <-3;
GoOn = TRUE;
sleepTime <-1;
while(GoOn){
url <- paste(
"https://api.weibo.com/2/comments/show.json?",
"id=4001968182199220&",
"page=", page,"&",
"access_token=这里很私密,就是通过验证的token,我当然不会告诉你啦",
sep =""
);
print(url)
commentJSONString <- getURL(
url,
.opts = list(ssl.verifypeer = FALSE)
);
commentJSON <-fromJSON(commentJSONString);
len <- length(commentJSON$comments)
print(len)
if(len==0){
print("需要休息一下下")
sleepTime <- sleepTime+1;
if(sleepTime>10){
GoOn <- FALSE;
}
} else {
result <- data.frame(
id=c(NA),
gender=c(NA),
followers_count=c(NA),
friends_count=c(NA),
pagefriends_count=c(NA),
statuses_count=c(NA),
favourites_count=c(NA),
created_at=c(NA),
verified=c(NA),
verified_type=c(NA),
verified_reason=c(NA),
verified_trade=c(NA),
(NA),
urank=c(NA),
screen_name=c(NA),
name=c(NA),
location=c(NA),
description=c(NA),
text=c(NA)
);
for(i in 1:len){
result[i,] <- c(
commentJSON$comments[[i]]$user$idstr,
commentJSON$comments[[i]]$user$gender,
commentJSON$comments[[i]]$user$followers_count,
commentJSON$comments[[i]]$user$friends_count,
commentJSON$comments[[i]]$user$pagefriends_count,
commentJSON$comments[[i]]$user$statuses_count,
commentJSON$comments[[i]]$user$favourites_count,
commentJSON$comments[[i]]$user$created_at,
commentJSON$comments[[i]]$user$verified,
commentJSON$comments[[i]]$user$verified_type,
commentJSON$comments[[i]]$user$verified_reason,
commentJSON$comments[[i]]$user$verified_trade,
commentJSON$comments[[i]]$user$lang,
commentJSON$comments[[i]]$user$urank,
commentJSON$comments[[i]]$user$screen_name,
commentJSON$comments[[i]]$user$name,
commentJSON$comments[[i]]$user$location,
commentJSON$comments[[i]]$user$description,
commentJSON$comments[[i]]$text
)
}
write.csv(
result, row.names=FALSE,
col.names=FALSE, fileEncoding ="UTF-8",
file=paste("data/result_",times,"_", page,".txt", sep ="")
);
page <- page+1;
}
Sys.sleep(sleepTime);
}