/* 新浪科技文章采集 */
public function sina_tech() {
/* NEED CAULL PAGE NUM */
$page_num = intval($_POST['get_post_page_num']);
if (empty($page_num)) $page_num = 1;
/* FIRST COUNT */
$post_count_a = M('post')->count();
/* FOR CULL */
for ($page = 1; $page <= $page_num; $page++) {
$fullpage = CurlGetPage('http://roll.tech.sina.com.cn/s/channel.php?ch=05#col=30&spec=&type=&ch=05&k=&offset_page=0&offset_num=0&num=5&asc=&page='.$page);
preg_match_all('/<div\s+.*\s+class="d_list_txt".*>\s+(.*)\s+<\/div>/Us', $fullpage, $match);
$fullpage = iconv("GB2312", "UTF-8", $match[1][0]);//echo $data1;die;
preg_match_all('/<li>(.*)<\/li>/isU', $fullpage, $in_li_tags);
foreach (array_unique($in_li_tags[1]) as $row) {
/* ti
preg_match_all('/<a href="[^"]*" target="_blank">(.*)<\/a>/', $row, $ti
$ti
/* li
preg_match_all('/href="([^"]*)"/', $row, $li
$li
/* DATE */
preg_match_all('/<span class="c_time">(.*)<\/span>/i', $row, $date);
$date = date("Y-", time()) . $date[1][0] . ':00';
// echo $ti
/* GOING THE POST PAGE */
$fullpage_post = CurlGetPage($li
/* FIX TAGS */
$fullpage_post = preg_replace('/<div class="img_wrapper">(.*)<\/div>/isU', '${1}', $fullpage_post);
$fullpage_post = preg_replace('/<div class="content-page" >(.*)<\/div>/Us', '', $fullpage_post);
//echo htmlspecialchars($fullpage_post);die;
/* POST CONTENT */
preg_match_all('/<div\s+.*\s+id="artibody".*>\s+(.*)\s+<\/div>/Us', $fullpage_post, $post_content);
/* DEL A TAGS */
$post_content = preg_replace("/<a[^>]*>(.*)<\/a>/isU", '${1}', $post_content[1][0]);
// echo '<h1>'.$ti
/* SAVE TO DB */
$post_ti
if ($post_ti
$dataMySql["ti
$dataMySql["content"] = $post_content;
$dataMySql["datetime"] = $date;
M('post')->add($dataMySql);
}
}
}
/* LAST COUNT */
$post_count_b = M('post')->count();
$post_add_num = $post_count_b - $post_count_a;
/* CALLBACK */
if ($post_count_a == $post_count_b) {
echo '{"success":1,"msg":"文章数无变化"}';
} else {
echo '{"success":1,"msg":"成功采集 ' . $post_add_num . ' 篇文章"}';
}
}
