Changeset 52 for trunk/includes
- Timestamp:
- 11/08/05 10:25:54 (6 years ago)
- Location:
- trunk/includes/rss
- Files:
-
- 2 added
- 1 removed
- 4 modified
-
extlib (added)
-
extlib/Snoopy.class.inc.php (added)
-
net (deleted)
-
rss_cache.inc.php (modified) (2 diffs)
-
rss_fetch.inc.php (modified) (5 diffs)
-
rss_parse.inc.php (modified) (1 diff)
-
rss_utils.inc.php (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/includes/rss/rss_cache.inc.php
r1 r52 3 3 * Project: MagpieRSS: a simple RSS integration tool 4 4 * File: rss_cache.inc, a simple, rolling(no GC), cache 5 * for RSS objects, keyed on URL.5 * for RSS objects, keyed on URL. 6 6 * Author: Kellan Elliott-McCrea <kellan@protest.net> 7 * Version: 0.518 * License: GPL7 * Version: 0.51 8 * License: GPL 9 9 * 10 10 * The lastest version of MagpieRSS can be obtained from: … … 18 18 19 19 class RSSCache { 20 var $BASE_CACHE = './cache'; // where the cache files are stored 21 var $MAX_AGE = 3600; // when are files stale, default one hour 22 var $ERROR = ""; // accumulate error messages 23 24 function RSSCache ($base='', $age='') { 25 if ( $base ) { 26 $this->BASE_CACHE = $base; 20 var $BASE_CACHE = './cache'; // where the cache files are stored 21 var $MAX_AGE = 3600; // when are files stale, default one hour 22 var $ERROR = ""; // accumulate error messages 23 24 function RSSCache ($base='', $age='') { 25 if ( $base ) { 26 $this->BASE_CACHE = $base; 27 } 28 if ( $age ) { 29 $this->MAX_AGE = $age; 30 } 31 32 // attempt to make the cache directory 33 if ( ! file_exists( $this->BASE_CACHE ) ) { 34 $status = @mkdir( $this->BASE_CACHE, 0755 ); 35 36 // if make failed 37 if ( ! $status ) { 38 $this->error( 39 "Cache couldn't make dir '" . $this->BASE_CACHE . "'." 40 ); 41 } 42 } 43 } 44 45 /*=======================================================================*\ 46 Function: set 47 Purpose: add an item to the cache, keyed on url 48 Input: url from wich the rss file was fetched 49 Output: true on sucess 50 \*=======================================================================*/ 51 function set ($url, $rss) { 52 $this->ERROR = ""; 53 $cache_file = $this->file_name( $url ); 54 $fp = @fopen( $cache_file, 'w' ); 55 56 if ( ! $fp ) { 57 $this->error( 58 "Cache unable to open file for writing: $cache_file" 59 ); 60 return 0; 61 } 62 63 64 $data = $this->serialize( $rss ); 65 fwrite( $fp, $data ); 66 fclose( $fp ); 67 68 return $cache_file; 69 } 70 71 /*=======================================================================*\ 72 Function: get 73 Purpose: fetch an item from the cache 74 Input: url from wich the rss file was fetched 75 Output: cached object on HIT, false on MISS 76 \*=======================================================================*/ 77 function get ($url) { 78 $this->ERROR = ""; 79 $cache_file = $this->file_name( $url ); 80 81 if ( ! file_exists( $cache_file ) ) { 82 $this->debug( 83 "Cache doesn't contain: $url (cache file: $cache_file)" 84 ); 85 return 0; 86 } 87 88 $fp = @fopen($cache_file, 'r'); 89 if ( ! $fp ) { 90 $this->error( 91 "Failed to open cache file for reading: $cache_file" 92 ); 93 return 0; 94 } 95 96 if ($filesize = filesize($cache_file) ) { 97 $data = fread( $fp, filesize($cache_file) ); 98 $rss = $this->unserialize( $data ); 99 100 return $rss; 101 } 102 103 return 0; 104 } 105 106 /*=======================================================================*\ 107 Function: check_cache 108 Purpose: check a url for membership in the cache 109 and whether the object is older then MAX_AGE (ie. STALE) 110 Input: url from wich the rss file was fetched 111 Output: cached object on HIT, false on MISS 112 \*=======================================================================*/ 113 function check_cache ( $url ) { 114 $this->ERROR = ""; 115 $filename = $this->file_name( $url ); 116 117 if ( file_exists( $filename ) ) { 118 // find how long ago the file was added to the cache 119 // and whether that is longer then MAX_AGE 120 $mtime = filemtime( $filename ); 121 $age = time() - $mtime; 122 if ( $this->MAX_AGE > $age ) { 123 // object exists and is current 124 return 'HIT'; 125 } 126 else { 127 // object exists but is old 128 return 'STALE'; 129 } 130 } 131 else { 132 // object does not exist 133 return 'MISS'; 134 } 135 } 136 137 function cache_age( $cache_key ) { 138 $filename = $this->file_name( $url ); 139 if ( file_exists( $filename ) ) { 140 $mtime = filemtime( $filename ); 141 $age = time() - $mtime; 142 return $age; 27 143 } 28 if ( $age ) { 29 $this->MAX_AGE = $age; 30 } 31 32 // attempt to make the cache directory 33 if ( ! file_exists( $this->BASE_CACHE ) ) { 34 $status = @mkdir( $this->BASE_CACHE, 0755 ); 35 36 // if make failed 37 if ( ! $status ) { 38 $this->error( 39 "Cache couldn't make dir '" . $this->BASE_CACHE . "'." 40 ); 41 } 144 else { 145 return -1; 42 146 } 43 147 } 44 148 45 149 /*=======================================================================*\ 46 Function: set 47 Purpose: add an item to the cache, keyed on url 48 Input: url from wich the rss file was fetched 49 Output: true on sucess 50 \*=======================================================================*/ 51 function set ($url, $rss) { 52 $this->ERROR = ""; 53 $cache_file = $this->file_name( $url ); 54 $fp = @fopen( $cache_file, 'w' ); 55 56 if ( ! $fp ) { 57 $this->error( 58 "Cache unable to open file for writing: $cache_file" 59 ); 60 return 0; 61 } 62 63 64 $data = $this->serialize( $rss ); 65 fwrite( $fp, $data ); 66 fclose( $fp ); 67 68 return $cache_file; 69 } 70 71 /*=======================================================================*\ 72 Function: get 73 Purpose: fetch an item from the cache 74 Input: url from wich the rss file was fetched 75 Output: cached object on HIT, false on MISS 76 \*=======================================================================*/ 77 function get ($url) { 78 $this->ERROR = ""; 79 $cache_file = $this->file_name( $url ); 80 81 if ( ! file_exists( $cache_file ) ) { 82 $this->debug( 83 "Cache doesn't contain: $url (cache file: $cache_file)" 84 ); 85 return 0; 86 } 87 88 $fp = @fopen($cache_file, 'r'); 89 if ( ! $fp ) { 90 $this->error( 91 "Failed to open cache file for reading: $cache_file" 92 ); 93 return 0; 94 } 95 96 $data = fread( $fp, filesize($cache_file) ); 97 $rss = $this->unserialize( $data ); 98 99 return $rss; 100 } 101 102 /*=======================================================================*\ 103 Function: check_cache 104 Purpose: check a url for membership in the cache 105 and whether the object is older then MAX_AGE (ie. STALE) 106 Input: url from wich the rss file was fetched 107 Output: cached object on HIT, false on MISS 108 \*=======================================================================*/ 109 function check_cache ( $url ) { 110 $this->ERROR = ""; 111 $filename = $this->file_name( $url ); 112 113 if ( file_exists( $filename ) ) { 114 // find how long ago the file was added to the cache 115 // and whether that is longer then MAX_AGE 116 $mtime = filemtime( $filename ); 117 $age = time() - $mtime; 118 if ( $this->MAX_AGE > $age ) { 119 // object exists and is current 120 return 'HIT'; 121 } 122 else { 123 // object exists but is old 124 return 'STALE'; 125 } 126 } 127 else { 128 // object does not exist 129 return 'MISS'; 130 } 131 } 132 133 /*=======================================================================*\ 134 Function: serialize 135 \*=======================================================================*/ 136 function serialize ( $rss ) { 137 return serialize( $rss ); 138 } 139 140 /*=======================================================================*\ 141 Function: unserialize 142 \*=======================================================================*/ 143 function unserialize ( $data ) { 144 return unserialize( $data ); 145 } 146 147 /*=======================================================================*\ 148 Function: file_name 149 Purpose: map url to location in cache 150 Input: url from wich the rss file was fetched 151 Output: a file name 152 \*=======================================================================*/ 153 function file_name ($url) { 154 $filename = md5( $url ); 155 return join( DIRECTORY_SEPARATOR, array( $this->BASE_CACHE, $filename ) ); 156 } 157 158 /*=======================================================================*\ 159 Function: error 160 Purpose: register error 161 \*=======================================================================*/ 162 function error ($errormsg, $lvl=E_USER_WARNING) { 163 // append PHP's error message if track_errors enabled 164 if ( isset($php_errormsg) ) { 165 $errormsg .= " ($php_errormsg)"; 166 } 167 $this->ERROR = $errormsg; 168 if ( MAGPIE_DEBUG ) { 169 trigger_error( $errormsg, $lvl); 170 } 171 else { 172 error_log( $errormsg, 0); 173 } 174 } 175 176 function debug ($debugmsg, $lvl=E_USER_NOTICE) { 177 if ( MAGPIE_DEBUG ) { 178 $this->error("MagpieRSS [debug] $debugmsg", $lvl); 179 } 180 } 150 Function: serialize 151 \*=======================================================================*/ 152 function serialize ( $rss ) { 153 return serialize( $rss ); 154 } 155 156 /*=======================================================================*\ 157 Function: unserialize 158 \*=======================================================================*/ 159 function unserialize ( $data ) { 160 return unserialize( $data ); 161 } 162 163 /*=======================================================================*\ 164 Function: file_name 165 Purpose: map url to location in cache 166 Input: url from wich the rss file was fetched 167 Output: a file name 168 \*=======================================================================*/ 169 function file_name ($url) { 170 $filename = md5( $url ); 171 return join( DIRECTORY_SEPARATOR, array( $this->BASE_CACHE, $filename ) ); 172 } 173 174 /*=======================================================================*\ 175 Function: error 176 Purpose: register error 177 \*=======================================================================*/ 178 function error ($errormsg, $lvl=E_USER_WARNING) { 179 // append PHP's error message if track_errors enabled 180 if ( isset($php_errormsg) ) { 181 $errormsg .= " ($php_errormsg)"; 182 } 183 $this->ERROR = $errormsg; 184 if ( MAGPIE_DEBUG ) { 185 trigger_error( $errormsg, $lvl); 186 } 187 else { 188 error_log( $errormsg, 0); 189 } 190 } 191 192 function debug ($debugmsg, $lvl=E_USER_NOTICE) { 193 if ( MAGPIE_DEBUG ) { 194 $this->error("MagpieRSS [debug] $debugmsg", $lvl); 195 } 196 } 181 197 182 198 } -
trunk/includes/rss/rss_fetch.inc.php
r1 r52 3 3 * Project: MagpieRSS: a simple RSS integration tool 4 4 * File: rss_fetch.inc, a simple functional interface 5 to fetching and parsing RSS files, via the6 function fetch_rss()5 to fetching and parsing RSS files, via the 6 function fetch_rss() 7 7 * Author: Kellan Elliott-McCrea <kellan@protest.net> 8 * License: GPL8 * License: GPL 9 9 * 10 10 * The lastest version of MagpieRSS can be obtained from: … … 21 21 // with thanks to rajiv and smarty 22 22 if (!defined('DIR_SEP')) { 23 define('DIR_SEP', DIRECTORY_SEPARATOR);23 define('DIR_SEP', DIRECTORY_SEPARATOR); 24 24 } 25 25 … … 32 32 33 33 // for including 3rd party libraries 34 define('MAGPIE_EXTLIB', MAGPIE_DIR . ' net' . DIR_SEP);34 define('MAGPIE_EXTLIB', MAGPIE_DIR . 'extlib' . DIR_SEP); 35 35 require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc.php'); 36 36 … … 62 62 63 63 64 65 /*=======================================================================*\ 66 Function: fetch_rss: 67 Purpose: return RSS object for the give url 68 maintain the cache 69 Input: url of RSS file 70 Output: parsed RSS object (see rss_parse.inc) 71 72 NOTES ON CACHEING: 73 If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache. 74 75 NOTES ON RETRIEVING REMOTE FILES: 76 If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will 77 return a cached object, and touch the cache object upon recieving a 78 304. 79 80 NOTES ON FAILED REQUESTS: 81 If there is an HTTP error while fetching an RSS object, the cached 82 version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off) 83 \*=======================================================================*/ 84 85 define('MAGPIE_VERSION', '0.61'); 86 define('MAGPIE_CACHE_ON','0'); 64 /*=======================================================================*\ 65 Function: fetch_rss: 66 Purpose: return RSS object for the give url 67 maintain the cache 68 Input: url of RSS file 69 Output: parsed RSS object (see rss_parse.inc) 70 71 NOTES ON CACHEING: 72 If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache. 73 74 NOTES ON RETRIEVING REMOTE FILES: 75 If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will 76 return a cached object, and touch the cache object upon recieving a 77 304. 78 79 NOTES ON FAILED REQUESTS: 80 If there is an HTTP error while fetching an RSS object, the cached 81 version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off) 82 \*=======================================================================*/ 83 84 define('MAGPIE_VERSION', '0.72'); 85 87 86 $MAGPIE_ERROR = ""; 88 87 89 88 function fetch_rss ($url) { 90 // initialize constants 91 init(); 92 93 if ( !isset($url) ) { 94 error("fetch_rss called without a url"); 95 return false; 96 } 97 // if cache is disabled 98 if ( !MAGPIE_CACHE_ON ) { 99 // fetch file, and parse it 100 $resp = _fetch_remote_file( $url ); 101 if ( is_success( $resp->status ) ) { 102 return _response_to_rss( $resp ); 103 } 104 else { 105 error("Failed to fetch $url and cache is off"); 106 return false; 107 } 108 } 109 // else cache is ON 110 else { 111 // Flow 112 // 1. check cache 113 // 2. if there is a hit, make sure its fresh 114 // 3. if cached obj fails freshness check, fetch remote 115 // 4. if remote fails, return stale object, or error 116 117 $cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE ); 118 119 if (MAGPIE_DEBUG and $cache->ERROR) { 120 debug($cache->ERROR, E_USER_WARNING); 121 } 122 123 124 $cache_status = 0; // response of check_cache 125 $request_headers = array(); // HTTP headers to send with fetch 126 $rss = 0; // parsed RSS object 127 $errormsg = 0; // errors, if any 128 129 if (!$cache->ERROR) { 130 // return cache HIT, MISS, or STALE 131 $cache_status = $cache->check_cache( $url ); 132 } 133 134 // if object cached, and cache is fresh, return cached obj 135 if ( $cache_status == 'HIT' ) { 136 $rss = $cache->get( $url ); 137 if ( isset($rss) and $rss ) { 138 $rss->from_cache = 1; 139 if ( MAGPIE_DEBUG > 1) { 140 debug("MagpieRSS: Cache HIT", E_USER_NOTICE); 141 } 142 return $rss; 143 } 144 } 145 146 // else attempt a conditional get 147 148 // setup headers 149 if ( $cache_status == 'STALE' ) { 150 $rss = $cache->get( $url ); 151 if ( $rss->etag and $rss->last_modified ) { 152 $request_headers['If-None-Match'] = $rss->etag; 153 $request_headers['If-Last-Modified'] = $rss->last_modified; 154 } 155 } 156 157 $resp = _fetch_remote_file( $url, $request_headers ); 158 if (isset($resp) and $resp) { 159 if ($resp->status == '304' ) { 160 // we have the most current copy 161 if ( MAGPIE_DEBUG > 1) { 162 debug("Got 304 for $url"); 163 } 164 // reset cache on 304 (at minutillo insistent prodding) 165 $cache->set($url, $rss); 166 return $rss; 167 } 168 elseif ( is_success( $resp->status ) ) { 169 $rss = _response_to_rss( $resp ); 170 if ( $rss ) { 171 if (MAGPIE_DEBUG > 1) { 172 debug("Fetch successful"); 173 } 174 // add object to cache 175 $cache->set( $url, $rss ); 176 return $rss; 177 } 178 } 179 else { 180 print $resp->status; 181 $errormsg = "Failed to fetch $url. "; 182 if ( $resp->error ) { 183 # compensate for Snoopy's annoying habbit to tacking 184 # on '\n' 185 $http_error = substr($resp->error, 0, -2); 186 $errormsg .= "(HTTP Error: $http_error)"; 187 } 188 else { 189 $errormsg .= "(HTTP Response: " . $resp->response_code .')'; 190 } 191 } 192 } 193 else { 194 $errormsg = "Unable to retrieve RSS file for unknown reasons."; 195 } 196 197 // else fetch failed 198 199 // attempt to return cached object 200 if ($rss) { 201 if ( MAGPIE_DEBUG ) { 202 debug("Returning STALE object for $url"); 203 } 204 return $rss; 205 } 206 207 // else we totally failed 208 error( $errormsg ); 209 210 return false; 211 212 } // end if ( !MAGPIE_CACHE_ON ) { 89 // initialize constants 90 init(); 91 92 if ( !isset($url) ) { 93 error("fetch_rss called without a url"); 94 return false; 95 } 96 97 // if cache is disabled 98 if ( !MAGPIE_CACHE_ON ) { 99 // fetch file, and parse it 100 $resp = _fetch_remote_file( $url ); 101 if ( is_success( $resp->status ) ) { 102 return _response_to_rss( $resp ); 103 } 104 else { 105 error("Failed to fetch $url and cache is off"); 106 return false; 107 } 108 } 109 // else cache is ON 110 else { 111 // Flow 112 // 1. check cache 113 // 2. if there is a hit, make sure its fresh 114 // 3. if cached obj fails freshness check, fetch remote 115 // 4. if remote fails, return stale object, or error 116 117 $cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE ); 118 119 if (MAGPIE_DEBUG and $cache->ERROR) { 120 debug($cache->ERROR, E_USER_WARNING); 121 } 122 123 124 $cache_status = 0; // response of check_cache 125 $request_headers = array(); // HTTP headers to send with fetch 126 $rss = 0; // parsed RSS object 127 $errormsg = 0; // errors, if any 128 129 // store parsed XML by desired output encoding 130 // as character munging happens at parse time 131 $cache_key = $url . MAGPIE_OUTPUT_ENCODING; 132 133 if (!$cache->ERROR) { 134 // return cache HIT, MISS, or STALE 135 $cache_status = $cache->check_cache( $cache_key); 136 } 137 138 // if object cached, and cache is fresh, return cached obj 139 if ( $cache_status == 'HIT' ) { 140 $rss = $cache->get( $cache_key ); 141 if ( isset($rss) and $rss ) { 142 // should be cache age 143 $rss->from_cache = 1; 144 if ( MAGPIE_DEBUG > 1) { 145 debug("MagpieRSS: Cache HIT", E_USER_NOTICE); 146 } 147 return $rss; 148 } 149 } 150 151 // else attempt a conditional get 152 153 // setup headers 154 if ( $cache_status == 'STALE' ) { 155 $rss = $cache->get( $cache_key ); 156 if ( $rss and $rss->etag and $rss->last_modified ) { 157 $request_headers['If-None-Match'] = $rss->etag; 158 $request_headers['If-Last-Modified'] = $rss->last_modified; 159 } 160 } 161 162 $resp = _fetch_remote_file( $url, $request_headers ); 163 164 if (isset($resp) and $resp) { 165 if ($resp->status == '304' ) { 166 // we have the most current copy 167 if ( MAGPIE_DEBUG > 1) { 168 debug("Got 304 for $url"); 169 } 170 // reset cache on 304 (at minutillo insistent prodding) 171 $cache->set($cache_key, $rss); 172 return $rss; 173 } 174 elseif ( is_success( $resp->status ) ) { 175 $rss = _response_to_rss( $resp ); 176 if ( $rss ) { 177 if (MAGPIE_DEBUG > 1) { 178 debug("Fetch successful"); 179 } 180 // add object to cache 181 $cache->set( $cache_key, $rss ); 182 return $rss; 183 } 184 } 185 else { 186 $errormsg = "Failed to fetch $url "; 187 if ( $resp->status == '-100' ) { 188 $errormsg .= "(Request timed out after " . MAGPIE_FETCH_TIME_OUT . " seconds)"; 189 } 190 elseif ( $resp->error ) { 191 # compensate for Snoopy's annoying habbit to tacking 192 # on '\n' 193 $http_error = substr($resp->error, 0, -2); 194 $errormsg .= "(HTTP Error: $http_error)"; 195 } 196 else { 197 $errormsg .= "(HTTP Response: " . $resp->response_code .')'; 198 } 199 } 200 } 201 else { 202 $errormsg = "Unable to retrieve RSS file for unknown reasons."; 203 } 204 205 // else fetch failed 206 207 // attempt to return cached object 208 if ($rss) { 209 if ( MAGPIE_DEBUG ) { 210 debug("Returning STALE object for $url"); 211 } 212 return $rss; 213 } 214 215 // else we totally failed 216 error( $errormsg ); 217 218 return false; 219 220 } // end if ( !MAGPIE_CACHE_ON ) { 213 221 } // end fetch_rss() 214 222 215 223 /*=======================================================================*\ 216 Function:error217 Purpose:set MAGPIE_ERROR, and trigger error224 Function: error 225 Purpose: set MAGPIE_ERROR, and trigger error 218 226 \*=======================================================================*/ 219 227 220 228 function error ($errormsg, $lvl=E_USER_WARNING) { 221 global $MAGPIE_ERROR;222 223 // append PHP's error message if track_errors enabled224 if ( isset($php_errormsg) ) {225 $errormsg .= " ($php_errormsg)";226 }227 if ( $errormsg ) {228 $errormsg = "MagpieRSS: $errormsg";229 $MAGPIE_ERROR = $errormsg;230 trigger_error( $errormsg, $lvl); 231 }229 global $MAGPIE_ERROR; 230 231 // append PHP's error message if track_errors enabled 232 if ( isset($php_errormsg) ) { 233 $errormsg .= " ($php_errormsg)"; 234 } 235 if ( $errormsg ) { 236 $errormsg = "MagpieRSS: $errormsg"; 237 $MAGPIE_ERROR = $errormsg; 238 trigger_error( $errormsg, $lvl); 239 } 232 240 } 233 241 234 242 function debug ($debugmsg, $lvl=E_USER_NOTICE) { 235 trigger_error("MagpieRSS [debug] $debugmsg", $lvl);236 } 237 238 /*=======================================================================*\ 239 Function:magpie_error240 Purpose:accessor for the magpie error variable243 trigger_error("MagpieRSS [debug] $debugmsg", $lvl); 244 } 245 246 /*=======================================================================*\ 247 Function: magpie_error 248 Purpose: accessor for the magpie error variable 241 249 \*=======================================================================*/ 242 250 function magpie_error ($errormsg="") { 243 global $MAGPIE_ERROR;244 245 if ( isset($errormsg) and $errormsg ) {246 $MAGPIE_ERROR = $errormsg;247 }248 249 return $MAGPIE_ERROR; 250 } 251 252 /*=======================================================================*\ 253 Function:_fetch_remote_file254 Purpose:retrieve an arbitrary remote file255 Input:url of the remote file256 headers to send along with the request (optional)257 Output: an HTTP response object (see Snoopy.class.inc) 251 global $MAGPIE_ERROR; 252 253 if ( isset($errormsg) and $errormsg ) { 254 $MAGPIE_ERROR = $errormsg; 255 } 256 257 return $MAGPIE_ERROR; 258 } 259 260 /*=======================================================================*\ 261 Function: _fetch_remote_file 262 Purpose: retrieve an arbitrary remote file 263 Input: url of the remote file 264 headers to send along with the request (optional) 265 Output: an HTTP response object (see Snoopy.class.inc) 258 266 \*=======================================================================*/ 259 267 function _fetch_remote_file ($url, $headers = "" ) { 260 // Snoopy is an HTTP client in PHP261 $client = new Snoopy();262 $client->agent = MAGPIE_USER_AGENT;263 $client->read_timeout = MAGPIE_FETCH_TIME_OUT;264 $client->use_gzip = MAGPIE_USE_GZIP;265 if (is_array($headers) ) {266 $client->rawheaders = $headers;267 }268 269 @$client->fetch($url);270 return $client;271 272 } 273 274 /*=======================================================================*\ 275 Function:_response_to_rss276 Purpose:parse an HTTP response object into an RSS object277 Input:an HTTP response object (see Snoopy)278 Output:parsed RSS object (see rss_parse)268 // Snoopy is an HTTP client in PHP 269 $client = new Snoopy(); 270 $client->agent = MAGPIE_USER_AGENT; 271 $client->read_timeout = MAGPIE_FETCH_TIME_OUT; 272 $client->use_gzip = MAGPIE_USE_GZIP; 273 if (is_array($headers) ) { 274 $client->rawheaders = $headers; 275 } 276 277 @$client->fetch($url); 278 return $client; 279 280 } 281 282 /*=======================================================================*\ 283 Function: _response_to_rss 284 Purpose: parse an HTTP response object into an RSS object 285 Input: an HTTP response object (see Snoopy) 286 Output: parsed RSS object (see rss_parse) 279 287 \*=======================================================================*/ 280 288 function _response_to_rss ($resp) { 281 $rss = new MagpieRSS( $resp->results ); 282 283 // if RSS parsed successfully 284 if ( $rss and !$rss->ERROR) { 285 // find Etag, and Last-Modified 286 foreach($resp->headers as $h) { 287 // 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1" 288 if (strpos($h, ": ")) { 289 list($field, $val) = explode(": ", $h, 2); 290 } 291 else { 292 $field = $h; 293 $val = ""; 294 } 295 296 if ( $field == 'ETag' ) { 297 $rss->etag = $val; 298 } 299 300 if ( $field == 'Last-Modified' ) { 301 $rss->last_modified = $val; 302 } 303 } 304 305 return $rss; 306 } // else construct error message 307 else { 308 $errormsg = "Failed to parse RSS file."; 309 310 if ($rss) { 311 $errormsg .= " (" . $rss->ERROR . ")"; 312 } 313 error($errormsg); 314 315 return false; 316 } // end if ($rss and !$rss->error) 317 } 318 319 /*=======================================================================*\ 320 Function: init 321 Purpose: setup constants with default values 322 check for user overrides 289 $rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING ); 290 291 // if RSS parsed successfully 292 if ( $rss and !$rss->ERROR) { 293 294 // find Etag, and Last-Modified 295 foreach($resp->headers as $h) { 296 // 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1" 297 if (strpos($h, ": ")) { 298 list($field, $val) = explode(": ", $h, 2); 299 } 300 else { 301 $field = $h; 302 $val = ""; 303 } 304 305 if ( $field == 'ETag' ) { 306 $rss->etag = $val; 307 } 308 309 if ( $field == 'Last-Modified' ) { 310 $rss->last_modified = $val; 311 } 312 } 313 314 return $rss; 315 } // else construct error message 316 else { 317 $errormsg = "Failed to parse RSS file."; 318 319 if ($rss) { 320 $errormsg .= " (" . $rss->ERROR . ")"; 321 } 322 error($errormsg); 323 324 return false; 325 } // end if ($rss and !$rss->error) 326 } 327 328 /*=======================================================================*\ 329 Function: init 330 Purpose: setup constants with default values 331 check for user overrides 323 332 \*=======================================================================*/ 324 333 function init () { 325 if ( defined('MAGPIE_INITALIZED') ) { 326 return; 327 } 328 else { 329 define('MAGPIE_INITALIZED', 1); 330 } 331 332 if ( !defined('MAGPIE_CACHE_ON') ) { 333 define('MAGPIE_CACHE_ON', 1); 334 } 335 336 if ( !defined('MAGPIE_CACHE_DIR') ) { 337 define('MAGPIE_CACHE_DIR', './cache'); 338 } 339 340 if ( !defined('MAGPIE_CACHE_AGE') ) { 341 define('MAGPIE_CACHE_AGE', 60*60); // one hour 342 } 343 344 if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) { 345 define('MAGPIE_CACHE_FRESH_ONLY', 0); 346 } 347 348 if ( !defined('MAGPIE_DEBUG') ) { 349 define('MAGPIE_DEBUG', 0); 350 } 351 352 if ( !defined('MAGPIE_USER_AGENT') ) { 353 $ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net'; 354 355 if ( MAGPIE_CACHE_ON ) { 356 $ua = $ua . ')'; 357 } 358 else { 359 $ua = $ua . '; No cache)'; 360 } 361 362 define('MAGPIE_USER_AGENT', $ua); 363 } 364 365 if ( !defined('MAGPIE_FETCH_TIME_OUT') ) { 366 define('MAGPIE_FETCH_TIME_OUT', 5); // 5 second timeout 367 } 368 369 // use gzip encoding to fetch rss files if supported? 370 if ( !defined('MAGPIE_USE_GZIP') ) { 371 define('MAGPIE_USE_GZIP', true); 372 } 334 if ( defined('MAGPIE_INITALIZED') ) { 335 return; 336 } 337 else { 338 define('MAGPIE_INITALIZED', true); 339 } 340 341 if ( !defined('MAGPIE_CACHE_ON') ) { 342 define('MAGPIE_CACHE_ON', true); 343 } 344 345 if ( !defined('MAGPIE_CACHE_DIR') ) { 346 define('MAGPIE_CACHE_DIR', './cache'); 347 } 348 349 if ( !defined('MAGPIE_CACHE_AGE') ) { 350 define('MAGPIE_CACHE_AGE', 60*60); // one hour 351 } 352 353 if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) { 354 define('MAGPIE_CACHE_FRESH_ONLY', false); 355 } 356 357 if ( !defined('MAGPIE_OUTPUT_ENCODING') ) { 358 define('MAGPIE_OUTPUT_ENCODING', 'ISO-8859-1'); 359 } 360 361 if ( !defined('MAGPIE_INPUT_ENCODING') ) { 362 define('MAGPIE_INPUT_ENCODING', null); 363 } 364 365 if ( !defined('MAGPIE_DETECT_ENCODING') ) { 366 define('MAGPIE_DETECT_ENCODING', true); 367 } 368 369 if ( !defined('MAGPIE_DEBUG') ) { 370 define('MAGPIE_DEBUG', 0); 371 } 372 373 if ( !defined('MAGPIE_USER_AGENT') ) { 374 $ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net'; 375 376 if ( MAGPIE_CACHE_ON ) { 377 $ua = $ua . ')'; 378 } 379 else { 380 $ua = $ua . '; No cache)'; 381 } 382 383 define('MAGPIE_USER_AGENT', $ua); 384 } 385 386 if ( !defined('MAGPIE_FETCH_TIME_OUT') ) { 387 define('MAGPIE_FETCH_TIME_OUT', 5); // 5 second timeout 388 } 389 390 // use gzip encoding to fetch rss files if supported? 391 if ( !defined('MAGPIE_USE_GZIP') ) { 392 define('MAGPIE_USE_GZIP', true); 393 } 373 394 } 374 395 … … 377 398 378 399 /*=======================================================================*\ 379 HTTP STATUS CODE PREDICATES380 These functions attempt to classify an HTTP status code381 based on RFC 2616 and RFC 2518.382 383 All of them take an HTTP status code as input, and return true or false384 385 All this code is adapted from LWP's HTTP::Status.386 \*=======================================================================*/ 387 388 389 /*=======================================================================*\ 390 Function:is_info391 Purpose:return true if Informational status code400 HTTP STATUS CODE PREDICATES 401 These functions attempt to classify an HTTP status code 402 based on RFC 2616 and RFC 2518. 403 404 All of them take an HTTP status code as input, and return true or false 405 406 All this code is adapted from LWP's HTTP::Status. 407 \*=======================================================================*/ 408 409 410 /*=======================================================================*\ 411 Function: is_info 412 Purpose: return true if Informational status code 392 413 \*=======================================================================*/ 393 414 function is_info ($sc) { 394 return $sc >= 100 && $sc < 200;395 } 396 397 /*=======================================================================*\ 398 Function:is_success399 Purpose:return true if Successful status code415 return $sc >= 100 && $sc < 200; 416 } 417 418 /*=======================================================================*\ 419 Function: is_success 420 Purpose: return true if Successful status code 400 421 \*=======================================================================*/ 401 422 function is_success ($sc) { 402 return $sc >= 200 && $sc < 300;403 } 404 405 /*=======================================================================*\ 406 Function:is_redirect407 Purpose:return true if Redirection status code423 return $sc >= 200 && $sc < 300; 424 } 425 426 /*=======================================================================*\ 427 Function: is_redirect 428 Purpose: return true if Redirection status code 408 429 \*=======================================================================*/ 409 430 function is_redirect ($sc) { 410 return $sc >= 300 && $sc < 400;411 } 412 413 /*=======================================================================*\ 414 Function:is_error415 Purpose:return true if Error status code431 return $sc >= 300 && $sc < 400; 432 } 433 434 /*=======================================================================*\ 435 Function: is_error 436 Purpose: return true if Error status code 416 437 \*=======================================================================*/ 417 438 function is_error ($sc) { 418 return $sc >= 400 && $sc < 600;419 } 420 421 /*=======================================================================*\ 422 Function:is_client_error423 Purpose:return true if Error status code, and its a client error439 return $sc >= 400 && $sc < 600; 440 } 441 442 /*=======================================================================*\ 443 Function: is_client_error 444 Purpose: return true if Error status code, and its a client error 424 445 \*=======================================================================*/ 425 446 function is_client_error ($sc) { 426 return $sc >= 400 && $sc < 500;427 } 428 429 /*=======================================================================*\ 430 Function:is_client_error431 Purpose:return true if Error status code, and its a server error447 return $sc >= 400 && $sc < 500; 448 } 449 450 /*=======================================================================*\ 451 Function: is_client_error 452 Purpose: return true if Error status code, and its a server error 432 453 \*=======================================================================*/ 433 454 function is_server_error ($sc) { 434 return $sc >= 500 && $sc < 600;455 return $sc >= 500 && $sc < 600; 435 456 } 436 457 -
trunk/includes/rss/rss_parse.inc.php
r1 r52 1 1 <?php 2 /* 3 * Project: MagpieRSS: a simple RSS integration tool 4 * File: rss_parse.inc - parse an RSS or Atom feed 5 * return as a simple object. 6 * 7 * Handles RSS 0.9x, RSS 2.0, RSS 1.0, and Atom 0.3 8 * 9 * The lastest version of MagpieRSS can be obtained from: 10 * http://magpierss.sourceforge.net 11 * 12 * For questions, help, comments, discussion, etc., please join the 13 * Magpie mailing list: 14 * magpierss-general@lists.sourceforge.net 15 * 16 * Author: Kellan Elliott-McCrea <kellan@protest.net> 17 * Version: 0.6a 18 * License: GPL 19 * 20 * 21 * ABOUT MAGPIE's APPROACH TO PARSING: 22 * - Magpie is based on expat, an XML parser, and therefore will only parse 23 * valid XML files. This includes all properly constructed RSS or Atom. 24 * 25 * - Magpie is an inclusive parser. It will include any elements that 26 * it can turn into a key value pair in the parsed feed object it returns. 27 * 28 * - Magpie supports namespaces, and will return any elements found in a 29 * namespace in a sub-array, with the key point to that array being the 30 * namespace prefix. 31 * (e.g. if an item contains a <dc:date> element, then that date can 32 * be accessed at $item['dc']['date'] 33 * 34 * - Magpie supports nested elements by combining the names. If an item 35 * includes XML like: 36 * <author> 37 * <name>Kellan</name> 38 * </author> 39 * 40 * The name field is accessible at $item['author_name'] 41 * 42 * - Magpie makes no attempt validate a feed beyond insuring that it 43 * is valid XML. 44 * RSS validators are readily available on the web at: 45 * http://feeds.archive.org/validator/ 46 * http://www.ldodds.com/rss_validator/1.0/validator.html 47 * 48 * 49 * EXAMPLE PARSED RSS ITEM: 50 * 51 * Magpie tries to parse RSS into easy to use PHP datastructures. 52 * 53 * For example, Magpie on encountering (a rather complex) RSS 1.0 item entry: 54 * 55 * <item rdf:about="http://protest.net/NorthEast/calendrome.cgi?span=event&ID=210257"> 56 * <title>Weekly Peace Vigil</title> 57 * <link>http://protest.net/NorthEast/calendrome.cgi?span=event&ID=210257</link> 58 * <description>Wear a white ribbon</description> 59 * <dc:subject>Peace</dc:subject> 60 * <ev:startdate>2002-06-01T11:00:00</ev:startdate> 61 * <ev:location>Northampton, MA</ev:location> 62 * <ev:type>Protest</ev:type> 63 * </item> 64 * 65 * Would transform it into the following associative array, and push it 66 * onto the array $rss-items 67 * 68 * array( 69 * title => 'Weekly Peace Vigil', 70 * link => 'http://protest.net/NorthEast/calendrome.cgi?span=event&ID=210257', 71 * description => 'Wear a white ribbon', 72 * dc => array ( 73 * subject => 'Peace' 74 * ), 75 * ev => array ( 76 * startdate => '2002-06-01T11:00:00', 77 * enddate => '2002-06-01T12:00:00', 78 * type => 'Protest', 79 * location => 'Northampton, MA' 80 * ) 81 * ) 82 * 83 * 84 * 85 * A FEW NOTES ON PARSING Atom FEEDS 86 * 87 * Atom support is considered alpha. Atom elements will be often be available 88 * as their RSS equivalent, summary is available as description for example. 89 * 90 * Elements of mode=xml, as flattened into a single string, just as if they 91 * had been wrapped in a CDATA container. 92 * 93 * See: http://laughingmeme.org/archives/001676.html 94 * 95 */ 2 3 /** 4 * Project: MagpieRSS: a simple RSS integration tool 5 * File: rss_parse.inc - parse an RSS or Atom feed 6 * return as a simple object. 7 * 8 * Handles RSS 0.9x, RSS 2.0, RSS 1.0, and Atom 0.3 9 * 10 * The lastest version of MagpieRSS can be obtained from: 11 * http://magpierss.sourceforge.net 12 * 13 * For questions, help, comments, discussion, etc., please join the 14 * Magpie mailing list: 15 * magpierss-general@lists.sourceforge.net 16 * 17 * @author Kellan Elliott-McCrea <kellan@protest.net> 18 * @version 0.7a 19 * @license GPL 20 * 21 */ 96 22 97 23 define('RSS', 'RSS'); 98 24 define('ATOM', 'Atom'); 99 25 100 26 require_once (MAGPIE_DIR . 'rss_utils.inc.php'); 27 28 /** 29 * Hybrid parser, and object, takes RSS as a string and returns a simple object. 30 * 31 * see: rss_fetch.inc for a simpler interface with integrated caching support 32 * 33 */ 101 34 class MagpieRSS { 102 /* 103 * Hybrid parser, and object. (probably a bad idea! :) 104 * 105 * Useage Example: 106 * 107 * $some_rss = "<?xml version="1.0"...... 108 * 109 * $rss = new MagpieRSS( $some_rss ); 110 * 111 * // print rss chanel title 112 * echo $rss->channel['title']; 113 * 114 * // print the title of each item 115 * foreach ($rss->items as $item ) { 116 * echo $item[title]; 117 * } 118 * 119 * see: rss_fetch.inc for a simpler interface 120 */ 121 122 var $parser; 123 124 var $current_item = array(); // item currently being parsed 125 var $items = array(); // collection of parsed items 126 var $channel = array(); // hash of channel fields 127 var $textinput = array(); 128 var $image = array(); 129 var $feed_type; 130 var $feed_version; 131 132 // parser variables 133 var $stack = array(); // parser stack 134 var $inchannel = false; 135 var $initem = false; 136 var $incontent = false; // if in Atom <content mode="xml"> field 137 var $intextinput = false; 138 var $inimage = false; 139 var $current_field = ''; 140 var $current_namespace = false; 141 142 var $ERROR = ""; 143 144 var $_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright'); 145 /*======================================================================*\ 146 Function: MagpieRSS 147 Purpose: Constructor, sets up XML parser,parses source, 148 and populates object.. 149 Input: String containing the RSS to be parsed 150 \*======================================================================*/ 151 function MagpieRSS ($source) { 152 153 # if PHP xml isn't compiled in, die 154 # 155 if (!function_exists('xml_parser_create')) { 156 $this->error( "Failed to load PHP's XML Extension. " . 157 "http://www.php.net/manual/en/ref.xml.php", 158 E_USER_ERROR ); 159 } 160 161 $parser = @xml_parser_create(); 162 163 if (!is_resource($parser)) 164 { 165 $this->error( "Failed to create an instance of PHP's XML parser. " . 166 "http://www.php.net/manual/en/ref.xml.php", 167 E_USER_ERROR ); 168 } 169 170 171 $this->parser = $parser; 172 173 # pass in parser, and a reference to this object 174 # setup handlers 175 # 176 xml_set_object( $this->parser, $this ); 177 xml_set_element_handler($this->parser, 178 'feed_start_element', 'feed_end_element' ); 179 180 xml_set_character_data_handler( $this->parser, 'feed_cdata' ); 181 182 $status = xml_parse( $this->parser, $source ); 183 184 if (! $status ) { 185 $errorcode = xml_get_error_code( $this->parser ); 186 if ( $errorcode != XML_ERROR_NONE ) { 187 $xml_error = xml_error_string( $errorcode ); 188 $error_line = xml_get_current_line_number($this->parser); 189 $error_col = xml_get_current_column_number($this->parser); 190 $errormsg = "$xml_error at line $error_line, column $error_col"; 191 192 $this->error( $errormsg ); 193 } 194 } 195 196 xml_parser_free( $this->parser ); 197 198 $this->normalize(); 35 var $parser; 36 37 var $current_item = array(); // item currently being parsed 38 var $items = array(); // collection of parsed items 39 var $channel = array(); // hash of channel fields 40 var $textinput = array(); 41 var $image = array(); 42 var $feed_type; 43 var $feed_version; 44 var $encoding = ''; // output encoding of parsed rss 45 46 var $_source_encoding = ''; // only set if we have to parse xml prolog 47 48 var $ERROR = ""; 49 var $WARNING = ""; 50 51 // define some constants 52 53 var $_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright'); 54 var $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1'); 55 56 // parser variables, useless if you're not a parser, treat as private 57 var $stack = array(); // parser stack 58 var $inchannel = false; 59 var $initem = false; 60 var $incontent = false; // if in Atom <content mode="xml"> field 61 var $intextinput = false; 62 var $inimage = false; 63 var $current_namespace = false; 64 65 66 /** 67 * Set up XML parser, parse source, and return populated RSS object.. 68 * 69 * @param string $source string containing the RSS to be parsed 70 * 71 * NOTE: Probably a good idea to leave the encoding options alone unless 72 * you know what you're doing as PHP's character set support is 73 * a little weird. 74 * 75 * NOTE: A lot of this is unnecessary but harmless with PHP5 76 * 77 * 78 * @param string $output_encoding output the parsed RSS in this character 79 * set defaults to ISO-8859-1 as this is PHP's 80 * default. 81 * 82 * NOTE: might be changed to UTF-8 in future 83 * versions. 84 * 85 * @param string $input_encoding the character set of the incoming RSS source. 86 * Leave blank and Magpie will try to figure it 87 * out. 88 * 89 * 90 * @param bool $detect_encoding if false Magpie won't attempt to detect 91 * source encoding. (caveat emptor) 92 * 93 */ 94 function MagpieRSS ($source, $output_encoding='ISO-8859-1', 95 $input_encoding=null, $detect_encoding=true) 96 { 97 # if PHP xml isn't compiled in, die 98 # 99 if (!function_exists('xml_parser_create')) { 100 $this->error( "Failed to load PHP's XML Extension. " . 101 "http://www.php.net/manual/en/ref.xml.php", 102 E_USER_ERROR ); 103 } 104 105 list($parser, $source) = $this->create_parser($source, 106 $output_encoding, $input_encoding, $detect_encoding); 107 108 109 if (!is_resource($parser)) { 110 $this->error( "Failed to create an instance of PHP's XML parser. " . 111 "http://www.php.net/manual/en/ref.xml.php", 112 E_USER_ERROR ); 113 } 114 115 116 $this->parser = $parser; 117 118 # pass in parser, and a reference to this object 119 # setup handlers 120 # 121 xml_set_object( $this->parser, $this ); 122 xml_set_element_handler($this->parser, 123 'feed_start_element', 'feed_end_element' ); 124 125 xml_set_character_data_handler( $this->parser, 'feed_cdata' ); 126 127 $status = xml_parse( $this->parser, $source ); 128 129 if (! $status ) { 130 $errorcode = xml_get_error_code( $this->parser ); 131 if ( $errorcode != XML_ERROR_NONE ) { 132 $xml_error = xml_error_string( $errorcode ); 133 $error_line = xml_get_current_line_number($this->parser); 134 $error_col = xml_get_current_column_number($this->parser); 135 $errormsg = "$xml_error at line $error_line, column $error_col"; 136 137 $this->error( $errormsg ); 138 } 139 } 140 141 xml_parser_free( $this->parser ); 142 143 $this->normalize(); 144 } 145 146 function feed_start_element($p, $element, &$attrs) { 147 $el = $element = strtolower($element); 148 $attrs = array_change_key_case($attrs, CASE_LOWER); 149 150 // check for a namespace, and split if found 151 $ns = false; 152 if ( strpos( $element, ':' ) ) { 153 list($ns, $el) = split( ':', $element, 2); 154 } 155 if ( $ns and $ns != 'rdf' ) { 156 $this->current_namespace = $ns; 157 } 158 159 # if feed type isn't set, then this is first element of feed 160 # identify feed from root element 161 # 162 if (!isset($this->feed_type) ) { 163 if ( $el == 'rdf' ) { 164 $this->feed_type = RSS; 165 $this->feed_version = '1.0'; 166 } 167 elseif ( $el == 'rss' ) { 168 $this->feed_type = RSS; 169 $this->feed_version = $attrs['version']; 170 } 171 elseif ( $el == 'feed' ) { 172 $this->feed_type = ATOM; 173 $this->feed_version = $attrs['version']; 174 $this->inchannel = true; 175 } 176 return; 177 } 178 179 if ( $el == 'channel' ) 180 { 181 $this->inchannel = true; 182 } 183 elseif ($el == 'item' or $el == 'entry' ) 184 { 185 $this->initem = true; 186 if ( isset($attrs['rdf:about']) ) { 187 $this->current_item['about'] = $attrs['rdf:about']; 188 } 189 } 190 191 // if we're in the default namespace of an RSS feed, 192 // record textinput or image fields 193 elseif ( 194 $this->feed_type == RSS and 195 $this->current_namespace == '' and 196 $el == 'textinput' ) 197 { 198 $this->intextinput = true; 199 } 200 201 elseif ( 202 $this->feed_type == RSS and 203 $this->current_namespace == '' and 204 $el == 'image' ) 205 { 206 $this->inimage = true; 207 } 208 209 # handle atom content constructs 210 elseif ( $this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) 211 { 212 // avoid clashing w/ RSS mod_content 213 if ($el == 'content' ) { 214 $el = 'atom_content'; 215 } 216 217 $this->incontent = $el; 218 219 220 } 221 222 // if inside an Atom content construct (e.g. content or summary) field treat tags as text 223 elseif ($this->feed_type == ATOM and $this->incontent ) 224 { 225 // if tags are inlined, then flatten 226 $attrs_str = join(' ', 227 array_map('map_attrs', 228 array_keys($attrs), 229 array_values($attrs) ) ); 230 231 $this->append_content( "<$element $attrs_str>" ); 232 233 array_unshift( $this->stack, $el ); 234 } 235 236 // Atom support many links per containging element. 237 // Magpie treats link elements of type rel='alternate' 238 // as being equivalent to RSS's simple link element. 239 // 240 elseif ($this->feed_type == ATOM and $el == 'link' ) 241 { 242 if ( isset($attrs['rel']) and $attrs['rel'] == 'alternate' ) 243 { 244 $link_el = 'link'; 245 } 246 else { 247 $link_el = 'link_' . $attrs['rel']; 248 } 249 250 $this->append($link_el, $attrs['href']); 251 } 252 // set stack[0] to current element 253 else { 254 array_unshift($this->stack, $el); 255 } 256 } 257 258 259 260 function feed_cdata ($p, $text) { 261 if ($this->feed_type == ATOM and $this->incontent) 262 { 263 $this->append_content( $text ); 264 } 265 else { 266 $current_el = join('_', array_reverse($this->stack)); 267 $this->append($current_el, $text); 268 } 269 } 270 271 function feed_end_element ($p, $el) { 272 $el = strtolower($el); 273 274 if ( $el == 'item' or $el == 'entry' ) 275 { 276 $this->items[] = $this->current_item; 277 $this->current_item = array(); 278 $this->initem = false; 279 } 280 elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'textinput' ) 281 { 282 $this->intextinput = false; 283 } 284 elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'image' ) 285 { 286 $this->inimage = false; 287 } 288 elseif ($this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) 289 { 290 $this->incontent = false; 291 } 292 elseif ($el == 'channel' or $el == 'feed' ) 293 { 294 $this->inchannel = false; 295 } 296 elseif ($this->feed_type == ATOM and $this->incontent ) { 297 // balance tags properly 298 // note: i don't think this is actually neccessary 299 if ( $this->stack[0] == $el ) 300 { 301 $this->append_content("</$el>"); 302 } 303 else { 304 $this->append_content("<$el />"); 305 } 306 307 array_shift( $this->stack ); 308 } 309 else { 310 array_shift( $this->stack ); 311 } 312 313 $this->current_namespace = false; 314 } 315 316 function concat (&$str1, $str2="") { 317 if (!isset($str1) ) { 318 $str1=""; 319 } 320 $str1 .= $str2; 321 } 322 323 324 325 function append_content($text) { 326 if ( $this->initem ) { 327 $this->concat( $this->current_item[ $this->incontent ], $text ); 328 } 329 elseif ( $this->inchannel ) { 330 $this->concat( $this->channel[ $this->incontent ], $text ); 331 } 332 } 333 334 // smart append - field and namespace aware 335 function append($el, $text) { 336 if (!$el) { 337 return; 338 } 339 if ( $this->current_namespace ) 340 { 341 if ( $this->initem ) { 342 $this->concat( 343 $this->current_item[ $this->current_namespace ][ $el ], $text); 344 } 345 elseif ($this->inchannel) { 346 $this->concat( 347 $this->channel[ $this->current_namespace][ $el ], $text ); 348 } 349 elseif ($this->intextinput) { 350 $this->concat( 351 $this->textinput[ $this->current_namespace][ $el ], $text ); 352 } 353 elseif ($this->inimage) { 354 $this->concat( 355 $this->image[ $this->current_namespace ][ $el ], $text ); 356 } 357 } 358 else { 359 if ( $this->initem ) { 360 $this->concat( 361 $this->current_item[ $el ], $text); 362 } 363 elseif ($this->intextinput) { 364 $this->concat( 365 $this->textinput[ $el ], $text ); 366 } 367 elseif ($this->inimage) { 368 $this->concat( 369 $this->image[ $el ], $text ); 370 } 371 elseif ($this->inchannel) { 372 $this->concat( 373 $this->channel[ $el ], $text ); 374 } 375 376 } 377 } 378 379 function normalize () { 380 // if atom populate rss fields 381 if ( $this->is_atom() ) { 382 $this->channel['description'] = $this->channel['tagline']; 383 for ( $i = 0; $i < count($this->items); $i++) { 384 $item = $this->items[$i]; 385 if ( isset($item['summary']) ) 386 $item['description'] = $item['summary']; 387 if ( isset($item['atom_content'])) 388 $item['content']['encoded'] = $item['atom_content']; 389 390 $atom_date = (isset($item['issued']) ) ? $item['issued'] : $item['modified']; 391 if ( $atom_date ) { 392 $epoch = @parse_w3cdtf($atom_date); 393 if ($epoch and $epoch > 0) { 394 $item['date_timestamp'] = $epoch; 395 } 396 } 397 398 $this->items[$i] = $item; 399 } 400 } 401 elseif ( $this->is_rss() ) { 402 $this->channel['tagline'] = $this->channel['description']; 403 for ( $i = 0; $i < count($this->items); $i++) { 404 $item = $this->items[$i]; 405 if ( isset($item['description'])) 406 $item['summary'] = $item['description']; 407 if ( isset($item['content']['encoded'] ) ) 408 $item['atom_content'] = $item['content']['encoded']; 409 410 if ( $this->is_rss() == '1.0' and isset($item['dc']['date']) ) { 411 $epoch = @parse_w3cdtf($item['dc']['date']); 412 if ($epoch and $epoch > 0) { 413 $item['date_timestamp'] = $epoch; 414 } 415 } 416 elseif ( isset($item['pubdate']) ) { 417 $epoch = @strtotime($item['pubdate']); 418 if ($epoch > 0) { 419 $item['date_timestamp'] = $epoch; 420 } 421 } 422 423 $this->items[$i] = $item; 424 } 425 } 426 } 427 428 429 function is_rss () { 430 if ( $this->feed_type == RSS ) { 431 return $this->feed_version; 432 } 433 else { 434 return false; 435 } 436 } 437 438 function is_atom() { 439 if ( $this->feed_type == ATOM ) { 440 return $this->feed_version; 441 } 442 else { 443 return false; 444 } 445 } 446 447 /** 448 * return XML parser, and possibly re-encoded source 449 * 450 */ 451 function create_parser($source, $out_enc, $in_enc, $detect) { 452 if ( substr(phpversion(),0,1) == 5) { 453 $parser = $this->php5_create_parser($in_enc, $detect); 454 } 455 else { 456 list($parser, $source) = $this->php4_create_parser($source, $in_enc, $detect); 457 } 458 if ($out_enc) { 459 $this->encoding = $out_enc; 460 xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $out_enc); 461 } 462 463 return array($parser, $source); 464 } 465 466 /** 467 * Instantiate an XML parser under PHP5 468 * 469 * PHP5 will do a fine job of detecting input encoding 470 * if passed an empty string as the encoding. 471 * 472 * All hail libxml2! 473 * 474 */ 475 function php5_create_parser($in_enc, $detect) { 476 // by default php5 does a fine job of detecting input encodings 477 if(!$detect && $in_enc) { 478 return xml_parser_create($in_enc); 479 } 480 else { 481 return xml_parser_create(''); 482 } 483 } 484 485 /** 486 * Instaniate an XML parser under PHP4 487 * 488 * Unfortunately PHP4's support for character encodings 489 * and especially XML and character encodings sucks. As 490 * long as the documents you parse only contain characters 491 * from the ISO-8859-1 character set (a superset of ASCII, 492 * and a subset of UTF-8) you're fine. However once you 493 * step out of that comfy little world things get mad, bad, 494 * and dangerous to know. 495 * 496 * The following code is based on SJM's work with FoF 497 * @see http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss 498 * 499 */ 500 function php4_create_parser($source, $in_enc, $detect) { 501 if ( !$detect ) { 502 return array(xml_parser_create($in_enc), $source); 503 } 504 505 if (!$in_enc) { 506 if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $source, $m)) { 507 $in_enc = strtoupper($m[1]); 508 $this->source_encoding = $in_enc; 509 } 510 else { 511 $in_enc = 'UTF-8'; 512 } 513 } 514 515 if ($this->known_encoding($in_enc)) { 516 return array(xml_parser_create($in_enc), $source); 517 } 518 519 // the dectected encoding is not one of the simple encodings PHP knows 520 521 // attempt to use the iconv extension to 522 // cast the XML to a known encoding 523 // @see http://php.net/iconv 524 525 if (function_exists('iconv')) { 526 $encoded_source = iconv($in_enc,'UTF-8', $source); 527 if ($encoded_source) { 528 return array(xml_parser_create('UTF-8'), $encoded_source); 529 } 530 } 531 532 // iconv didn't work, try mb_convert_encoding 533 // @see http://php.net/mbstring 534 if(function_exists('mb_convert_encoding')) { 535 $encoded_source = mb_convert_encoding($source, 'UTF-8', $in_enc ); 536 if ($encoded_source) { 537 return array(xml_parser_create('UTF-8'), $encoded_source); 538 } 539 } 540 541 // else 542 $this->error("Feed is in an unsupported character encoding. ($in_enc) " . 543 "You may see strange artifacts, and mangled characters.", 544 E_USER_NOTICE); 545 546 return array(xml_parser_create(), $source); 547 } 548 549 function known_encoding($enc) { 550 $enc = strtoupper($enc); 551 if ( in_array($enc, $this->_KNOWN_ENCODINGS) ) { 552 return $enc; 553 } 554 else { 555 return false; 556 } 557 } 558 559 function error ($errormsg, $lvl=E_USER_WARNING) { 560 // append PHP's error message if track_errors enabled 561 if ( isset($php_errormsg) ) { 562 $errormsg .= " ($php_errormsg)"; 563 } 564 if ( MAGPIE_DEBUG ) { 565 trigger_error( $errormsg, $lvl); 566 } 567 else { 568 error_log( $errormsg, 0); 569 } 570 571 $notices = E_USER_NOTICE|E_NOTICE; 572 if ( $lvl&$notices ) { 573 $this->WARNING = $errormsg; 574 } else { 575 $this->ERROR = $errormsg; 576 } 577 } 578 579 580 } // end class RSS 581 582 function map_attrs($k, $v) { 583 return "$k=\"$v\""; 584 } 585 586 // patch to support medieval versions of PHP4.1.x, 587 // courtesy, Ryan Currie, ryan@digibliss.com 588 589 if (!function_exists('array_change_key_case')) { 590 define("CASE_UPPER",1); 591 define("CASE_LOWER",0); 592 593 594 function array_change_key_case($array,$case=CASE_LOWER) { 595 if ($case=CASE_LOWER) $cmd=strtolower; 596 elseif ($case=CASE_UPPER) $cmd=strtoupper; 597 foreach($array as $key=>$value) { 598 $output[$cmd($key)]=$value; 599 } 600 return $output; 199 601 } 200 201 function feed_start_element($p, $element, &$attrs) { 202 $el = $element = strtolower($element); 203 $attrs = array_change_key_case($attrs, CASE_LOWER); 204 205 // check for a namespace, and split if found 206 $ns = false; 207 if ( strpos( $element, ':' ) ) { 208 list($ns, $el) = split( ':', $element, 2); 209 } 210 if ( $ns and $ns != 'rdf' ) { 211 $this->current_namespace = $ns; 212 } 213 214 # if feed type isn't set, then this is first element of feed 215 # identify feed from root element 216 # 217 if (!isset($this->feed_type) ) { 218 if ( $el == 'rdf' ) { 219 $this->feed_type = RSS; 220 $this->feed_version = '1.0'; 221 } 222 elseif ( $el == 'rss' ) { 223 $this->feed_type = RSS; 224 $this->feed_version = $attrs['version']; 225 } 226 elseif ( $el == 'feed' ) { 227 $this->feed_type = ATOM; 228 $this->feed_version = $attrs['version']; 229 $this->inchannel = true; 230 } 231 return; 232 } 233 234 if ( $el == 'channel' ) 235 { 236 $this->inchannel = true; 237 } 238 elseif ($el == 'item' or $el == 'entry' ) 239 { 240 $this->initem = true; 241 if ( isset($attrs['rdf:about']) ) { 242 $this->current_item['about'] = $attrs['rdf:about']; 243 } 244 } 245 246 // if we're in the default namespace of an RSS feed, 247 // record textinput or image fields 248 elseif ( 249 $this->feed_type == RSS and 250 $this->current_namespace == '' and 251 $el == 'textinput' ) 252 { 253 $this->intextinput = true; 254 } 255 256 elseif ( 257 $this->feed_type == RSS and 258 $this->current_namespace == '' and 259 $el == 'image' ) 260 { 261 $this->inimage = true; 262 } 263 264 # handle atom content constructs 265 elseif ( $this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) 266 { 267 // avoid clashing w/ RSS mod_content 268 if ($el == 'content' ) { 269 $el = 'atom_content'; 270 } 271 272 $this->incontent = $el; 273 274 275 } 276 277 // if inside an Atom content construct (e.g. content or summary) field treat tags as text 278 elseif ($this->feed_type == ATOM and $this->incontent ) 279 { 280 // if tags are inlined, then flatten 281 $attrs_str = join(' ', 282 array_map('map_attrs', 283 array_keys($attrs), 284 array_values($attrs) ) ); 285 286 $this->append_content( "<$element $attrs_str>" ); 287 288 array_unshift( $this->stack, $el ); 289 } 290 291 // Atom support many links per containging element. 292 // Magpie treats link elements of type rel='alternate' 293 // as being equivalent to RSS's simple link element. 294 // 295 elseif ($this->feed_type == ATOM and $el == 'link' ) 296 { 297 if ( isset($attrs['rel']) and $attrs['rel'] == 'alternate' ) 298 { 299 $link_el = 'link'; 300 } 301 else { 302 $link_el = 'link_' . $attrs['rel']; 303 } 304 305 $this->append($link_el, $attrs['href']); 306 } 307 // set stack[0] to current element 308 else { 309 array_unshift($this->stack, $el); 310 } 311 } 312 313 314 315 function feed_cdata ($p, $text) { 316 317 if ($this->feed_type == ATOM and $this->incontent) 318 { 319 $this->append_content( $text ); 320 } 321 else { 322 $current_el = join('_', array_reverse($this->stack)); 323 $this->append($current_el, $text); 324 } 325 } 326 327 function feed_end_element ($p, $el) { 328 $el = strtolower($el); 329 330 if ( $el == 'item' or $el == 'entry' ) 331 { 332 $this->items[] = $this->current_item; 333 $this->current_item = array(); 334 $this->initem = false; 335 } 336 elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'textinput' ) 337 { 338 $this->intextinput = false; 339 } 340 elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'image' ) 341 { 342 $this->inimage = false; 343 } 344 elseif ($this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) 345 { 346 $this->incontent = false; 347 } 348 elseif ($el == 'channel' or $el == 'feed' ) 349 { 350 $this->inchannel = false; 351 } 352 elseif ($this->feed_type == ATOM and $this->incontent ) { 353 // balance tags properly 354 // note: i don't think this is actually neccessary 355 if ( $this->stack[0] == $el ) 356 { 357 $this->append_content("</$el>"); 358 } 359 else { 360 $this->append_content("<$el />"); 361 } 362 363 array_shift( $this->stack ); 364 } 365 else { 366 array_shift( $this->stack ); 367 } 368 369 $this->current_namespace = false; 370 } 371 372 function concat (&$str1, $str2="") { 373 if (!isset($str1) ) { 374 $str1=""; 375 } 376 $str1 .= $str2; 377 } 378 379 380 381 function append_content($text) { 382 if ( $this->initem ) { 383 $this->concat( $this->current_item[ $this->incontent ], $text ); 384 } 385 elseif ( $this->inchannel ) { 386 $this->concat( $this->channel[ $this->incontent ], $text ); 387 } 388 } 389 390 // smart append - field and namespace aware 391 function append($el, $text) { 392 if (!$el) { 393 return; 394 } 395 if ( $this->current_namespace ) 396 { 397 if ( $this->initem ) { 398 $this->concat( 399 $this->current_item[ $this->current_namespace ][ $el ], $text); 400 } 401 elseif ($this->inchannel) { 402 $this->concat( 403 $this->channel[ $this->current_namespace][ $el ], $text ); 404 } 405 elseif ($this->intextinput) { 406 $this->concat( 407 $this->textinput[ $this->current_namespace][ $el ], $text ); 408 } 409 elseif ($this->inimage) { 410 $this->concat( 411 $this->image[ $this->current_namespace ][ $el ], $text ); 412 } 413 } 414 else { 415 if ( $this->initem ) { 416 $this->concat( 417 $this->current_item[ $el ], $text); 418 } 419 elseif ($this->intextinput) { 420 $this->concat( 421 $this->textinput[ $el ], $text ); 422 } 423 elseif ($this->inimage) { 424 $this->concat( 425 $this->image[ $el ], $text ); 426 } 427 elseif ($this->inchannel) { 428 $this->concat( 429 $this->channel[ $el ], $text ); 430 } 431 432 } 433 } 434 435 function normalize () { 436 // if atom populate rss fields 437 if ( $this->is_atom() ) { 438 $this->channel['descripton'] = $this->channel['tagline']; 439 for ( $i = 0; $i < count($this->items); $i++) { 440 $item = $this->items[$i]; 441 if ( isset($item['summary']) ) 442 $item['description'] = $item['summary']; 443 if ( isset($item['atom_content'])) 444 $item['content']['encoded'] = $item['atom_content']; 445 446 $this->items[$i] = $item; 447 } 448 } 449 elseif ( $this->is_rss() ) { 450 $this->channel['tagline'] = $this->channel['description']; 451 for ( $i = 0; $i < count($this->items); $i++) { 452 $item = $this->items[$i]; 453 if ( isset($item['description'])) 454 $item['summary'] = $item['description']; 455 if ( isset($item['content']['encoded'] ) ) 456 $item['atom_content'] = $item['content']['encoded']; 457 458 $this->items[$i] = $item; 459 } 460 } 461 } 462 463 function error ($errormsg, $lvl=E_USER_WARNING) { 464 // append PHP's error message if track_errors enabled 465 if ( $php_errormsg ) { 466 $errormsg .= " ($php_errormsg)"; 467 } 468 $this->ERROR = $errormsg; 469 if ( MAGPIE_DEBUG ) { 470 trigger_error( $errormsg, $lvl); 471 } 472 else { 473 error_log( $errormsg, 0); 474 } 475 } 476 477 function is_rss () { 478 if ( $this->feed_type == RSS ) { 479 return $this->feed_version; 480 } 481 else { 482 return false; 483 } 484 } 485 486 function is_atom() { 487 if ( $this->feed_type == ATOM ) { 488 return $this->feed_version; 489 } 490 else { 491 return false; 492 } 493 } 494 495 /*======================================================================*\ 496 EVERYTHING BELOW HERE IS FOR DEBUGGING PURPOSES 497 \*======================================================================*/ 498 function show_list () { 499 echo "<ol>\n"; 500 foreach ($this->items as $item) { 501 echo "<li>", $this->show_item( $item ); 502 } 503 echo "</ol>"; 504 } 505 506 function show_channel () { 507 echo "channel:<br>"; 508 echo "<ul>"; 509 while ( list($key, $value) = each( $this->channel ) ) { 510 echo "<li> $key: $value"; 511 } 512 echo "</ul>"; 513 } 514 515 function show_item ($item) { 516 echo "item: $item[title]"; 517 echo "<ul>"; 518 while ( list($key, $value) = each($item) ) { 519 if ( is_array($value) ) { 520 echo "<br><b>$key</b>"; 521 echo "<ul>"; 522 while ( list( $ns_key, $ns_value) = each( $value ) ) { 523 echo "<li>$ns_key: $ns_value"; 524 } 525 echo "</ul>"; 526 } 527 else { 528 echo "<li> $key: $value"; 529 } 530 } 531 echo "</ul>"; 532 } 533 534 /*======================================================================*\ 535 END DEBUGGING FUNCTIONS 536 \*======================================================================*/ 537 538 539 540 } # end class RSS 541 542 function map_attrs($k, $v) { 543 return "$k=\"$v\""; 602 544 603 } 545 604 546 547 605 ?> -
trunk/includes/rss/rss_utils.inc.php
r1 r52 4 4 * File: rss_utils.inc, utility methods for working with RSS 5 5 * Author: Kellan Elliott-McCrea <kellan@protest.net> 6 * Version: 0.517 * License: GPL6 * Version: 0.51 7 * License: GPL 8 8 * 9 9 * The lastest version of MagpieRSS can be obtained from: … … 20 20 Purpose: parse a W3CDTF date into unix epoch 21 21 22 NOTE: http://www.w3.org/TR/NOTE-datetime22 NOTE: http://www.w3.org/TR/NOTE-datetime 23 23 \*======================================================================*/ 24 24 25 25 function parse_w3cdtf ( $date_str ) { 26 27 # regex to match wc3dtf28 $pat = "/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/";29 30 if ( preg_match( $pat, $date_str, $match ) ) {31 list( $year, $month, $day, $hours, $minutes, $seconds) =32 array( $match[1], $match[2], $match[3], $match[4], $match[5], $match[6]);33 34 # calc epoch for current date assuming GMT35 $epoch = gmmktime( $hours, $minutes, $seconds, $month, $day, $year);36 37 $offset = 0;38 if ( $match[10] == 'Z' ) {39 # zulu time, aka GMT40 }41 else {42 list( $tz_mod, $tz_hour, $tz_min ) =43 array( $match[8], $match[9], $match[10]);44 45 # zero out the variables46 if ( ! $tz_hour ) { $tz_hour = 0; }47 if ( ! $tz_min ) { $tz_min = 0; }48 49 $offset_secs = (($tz_hour*60)+$tz_min)*60;50 51 # is timezone ahead of GMT? then subtract offset52 #53 if ( $tz_mod == '+' ) {54 $offset_secs = $offset_secs * -1;55 }56 57 $offset = $offset_secs; 58 }59 $epoch = $epoch + $offset;60 return $epoch;61 }62 else {63 return -1;64 }26 27 # regex to match wc3dtf 28 $pat = "/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/"; 29 30 if ( preg_match( $pat, $date_str, $match ) ) { 31 list( $year, $month, $day, $hours, $minutes, $seconds) = 32 array( $match[1], $match[2], $match[3], $match[4], $match[5], $match[6]); 33 34 # calc epoch for current date assuming GMT 35 $epoch = gmmktime( $hours, $minutes, $seconds, $month, $day, $year); 36 37 $offset = 0; 38 if ( $match[10] == 'Z' ) { 39 # zulu time, aka GMT 40 } 41 else { 42 list( $tz_mod, $tz_hour, $tz_min ) = 43 array( $match[8], $match[9], $match[10]); 44 45 # zero out the variables 46 if ( ! $tz_hour ) { $tz_hour = 0; } 47 if ( ! $tz_min ) { $tz_min = 0; } 48 49 $offset_secs = (($tz_hour*60)+$tz_min)*60; 50 51 # is timezone ahead of GMT? then subtract offset 52 # 53 if ( $tz_mod == '+' ) { 54 $offset_secs = $offset_secs * -1; 55 } 56 57 $offset = $offset_secs; 58 } 59 $epoch = $epoch + $offset; 60 return $epoch; 61 } 62 else { 63 return -1; 64 } 65 65 } 66 66
