플러그인 48: HTML 파일을 RSS 파일로 변환

<?php // Plug-in 48: HTML To RSS

// This is an executable example with additional code supplied
// To obtain just the plug-ins please click on the Download link

$url         = "http://www.mhprofessional.com/";
$html        = file_get_contents($url);
$title       = "RSS version of '$url'";
$description = "The website '$url' converted to an RSS feed";
$webmaster   = "[email protected]";
$copyright   = "Translator Copyright 2009 pluginphp.com";

header('Content-Type: text/xml');
echo PIPHP_HTMLToRSS($html, $title, $description, $url,
   $webmaster, $copyright);

function PIPHP_HTMLToRSS($html, $title, $description, $url,
   $webmaster, $copyright)
{
   // Plug-in 48: HTML To RSS
   //
   // This plug-in takes a string containing a complete HTML
   // page and turns it into RSS format which is returned. The
   // arguments required are:
   //
   //    $html:        HTML to convert to RSS
   //    $title:       Title to use
   //    $description: Description to use
   //    $url:         URL to link to (generally same as the
   //                  HTML source)
   //    $webmaster:   Webmaster contact email address
   //    $copyright:   Copyright details

   $date  = date("D, d M Y H:i:s e");
   $html  = str_replace('&', '&',         $html);
   $html  = str_replace('&',     '!!**1**!!', $html);
   $dom   = new domdocument();
   @$dom  ->loadhtml($html);
   $xpath = new domxpath($dom);
   $hrefs = $xpath->evaluate("/html/body//a");
   $links = array();
   $to    = array();
   $count = 0;

   for ($j = 0 ; $j < $hrefs->length ; ++$j)
      $links[] = $hrefs->item($j)->getAttribute('href');

   $links = array_unique($links);
   sort($links);

   foreach ($links as $link)
   {
      if ($link != "")
      {
         $temp = str_replace('!!**1**!!', '&', $link);
         $to[$count] = urlencode(PIPHP_RelToAbsURL($url, $temp));
         $html = str_replace("href=\"$link\"",
            "href=\"!!$count!!\"", $html);
         $html = str_replace("href='$link'",
            "href='!!$count!!'",   $html);
         $html = str_replace("href=$link",
            "href=!!$count!!",     $html);
         ++$count;
      }
   }

   for ($j = 0 ; $j < $count ; ++$j)
      $html = str_replace("!!$j!!", $to[$j],
         $html);

   $html = str_replace('http%3A%2F%2F', 'http://', $html);
   $html = str_replace('!!**1**!!', '&', $html);
   $html = preg_replace('/[\s]+/', ' ', $html);
   $html = preg_replace('/<script[^>]*>.*?<\/script>/i', '',
      $html);
   $html = preg_replace('/<style[^>]*>.*?<\/style>/i', '',
      $html);
   $ok   = '<a><i><b><u><s><h><img><div><span><table><tr>';
   $ok  .= '<th><tr><td><br><p><ul><ol><li>';
   $html = strip_tags($html, $ok);
   $html = preg_replace('/<h[1-7][^>]*?>/i', '<h>',
      $html);
   $html = htmlentities($html);
   $html = preg_replace("/<h>/si",
      "</description></item>
<item><title>", $html); $html = preg_replace("/<\/h[1-7]>/si", "</title><guid>$url</guid><description>", $html); return <<<_END <?xml version="1.0" encoding="UTF-8"?> <rss version="2.0"><channel> <generator>Pluginphp.com: plug-in 48</generator> <title>$title</title><link>$url</link> <description>$description</description> <language>en</language> <webMaster>$webmaster</webMaster> <copyright>$copyright</copyright> <pubDate>$date</pubDate> <lastBuildDate>$date</lastBuildDate> <item><title>$title</title> <guid>$url</guid> <description>$html</description></item></channel></rss> _END; } // The below function is repeated here to ensure that it's // available to the main function which relies on it function PIPHP_RelToAbsURL($page, $url) { // Plug-in 21: Relative To Absolute URL // // This plug-in accepts the absolute URL of a web page // and a link featured within that page. The link is then // turned into an absolute URL which can be independently // accessed. Only applies to http:// URLs. Arguments are: // // $page: The web page containing the URL // $url: The URL to convert to absolute if (substr($page, 0, 7) != "http://") return $url; $parse = parse_url($page); $root = $parse['scheme'] . "://" . $parse['host']; $p = strrpos(substr($page, 7), '/'); if ($p) $base = substr($page, 0, $p + 8); else $base = "$page/"; if (substr($url, 0, 1) == '/') $url = $root . $url; elseif (substr($url, 0, 7) != "http://") $url = $base . $url; return $url; } ?>

플러그인 설명:
이 플러그인은 올바른 형식의 RSS 파일을 반환하는 HTML 문서 또는 기타 관련 매개 변수를 받습니다.다음과 같은 매개 변수가 필요합니다.
$html 변환이 필요한 HTML 문서
$title을 RSS 파일의 제목으로 지정
$description RSS 파일 설명
$url RSS 파일 링크의 URL
$wenmaster 웹 사이트 관리자의 Email 주소
$copyright 저작권 정보

좋은 웹페이지 즐겨찾기