From e9c9b1fbf2d314f36115d3fd530ad56bba9e06fe Mon Sep 17 00:00:00 2001 From: ryan Date: Sun, 16 Dec 2007 21:34:48 +0000 Subject: [PATCH] Import file attachments. Props tellyworth. fixes #5466 git-svn-id: http://svn.automattic.com/wordpress/trunk@6390 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- wp-admin/import/wordpress.php | 109 +++++++++++++++++++++++++++++++--- wp-admin/includes/file.php | 21 +------ wp-includes/functions.php | 98 +++++++++++++++++++++++------- 3 files changed, 180 insertions(+), 48 deletions(-) diff --git a/wp-admin/import/wordpress.php b/wp-admin/import/wordpress.php index 64fb9d883..5b95ace76 100644 --- a/wp-admin/import/wordpress.php +++ b/wp-admin/import/wordpress.php @@ -10,7 +10,8 @@ class WP_Import { var $newauthornames = array (); var $allauthornames = array (); var $j = -1; - var $another_pass = false; + var $fetch_attachments = false; + var $url_remap = array (); function header() { echo '
'; @@ -189,10 +190,17 @@ class WP_Import { $this->users_form($j); echo ''; } +?> + +

+

+ + +

+'.'
'; echo ''; - echo ''; } @@ -295,7 +303,7 @@ class WP_Import { $post_content = preg_replace('|<(/?[A-Z]+)|e', "'<' . strtolower('$1')", $post_content); $post_content = str_replace('
', '
', $post_content); $post_content = str_replace('
', '
', $post_content); - + preg_match_all('|(.*?)|is', $post, $tags); $tags = $tags[1]; @@ -333,12 +341,24 @@ class WP_Import { } echo '
  • '; - printf(__('Importing post %s...'), stripslashes($post_title)); $post_author = $this->checkauthor($post_author); //just so that if a post already exists, new users are not created by checkauthor $postdata = compact('post_author', 'post_date', 'post_date_gmt', 'post_content', 'post_title', 'post_excerpt', 'post_status', 'post_name', 'comment_status', 'ping_status', 'post_modified', 'post_modified_gmt', 'guid', 'post_parent', 'menu_order', 'post_type'); - $comment_post_ID = $post_id = wp_insert_post($postdata); + if ($post_type == 'attachment') { + $remote_url = $this->get_tag( $post, 'wp:attachment_url' ); + if ( !$remote_url ) + $remote_url = $guid; + + $comment_post_ID = $post_id = $this->process_attachment($postdata, $remote_url); + if ( !$post_id or is_wp_error($post_id) ) + return $post_id; + } + else { + printf(__('Importing post %s...'), stripslashes($post_title)); + $comment_post_ID = $post_id = wp_insert_post($postdata); + } + if ( is_wp_error( $post_id ) ) return $post_id; @@ -420,6 +440,79 @@ class WP_Import { $value = stripslashes($value); // add_post_meta() will escape. add_post_meta( $post_id, $key, $value ); } } + + print "
  • \n"; + } + + function process_attachment($postdata, $remote_url) { + if ($this->fetch_attachments and $remote_url) { + printf( __('Importing attachment %s... '), htmlspecialchars($remote_url) ); + $upload = $this->fetch_remote_file($postdata, $remote_url); + if ( is_wp_error($upload) ) { + printf( __('Remote file error: %s'), htmlspecialchars($upload->get_error_message()) ); + return $upload; + } + else { + print '('.size_format(filesize($upload['file'])).')'; + } + + $postdata['guid'] = $upload['url']; + + // as per wp-admin/includes/upload.php + $post_id = wp_insert_attachment($postdata, $upload['file']); + wp_update_attachment_metadata( $post_id, wp_generate_attachment_metadata( $post_id, $upload['file'] ) ); + return $post_id; + } + else { + printf( __('Skipping attachment %s'), htmlspecialchars($remote_url) ); + } + } + + function fetch_remote_file($post, $url) { + $upload = wp_upload_dir($post['post_date']); + + // extract the file name and extension from the url + $file_name = basename($url); + + // get placeholder file in the upload dir with a unique sanitized filename + $upload = wp_upload_bits( $file_name, 0, '', $post['post_date']); + if ( $upload['error'] ) { + echo $upload['error']; + return new WP_Error( 'upload_dir_error', $upload['error'] ); + } + + // fetch the remote url and write it to the placeholder file + $headers = wp_get_http($url, $upload['file']); + + // make sure the fetch was successful + if ( $headers['response'] != '200' ) + return new WP_Error( 'import_file_error', __(sprintf('Remote file returned error response %d', intval($headers['response']))) ); + elseif ( isset($headers['content-length']) && filesize($upload['file']) != $headers['content-length'] ) + return new WP_Error( 'import_file_error', __('Remote file is incorrect size') ); + + // keep track of the old and new urls so we can substitute them later + $this->url_remap[$url] = $upload['url']; + // if the remote url is redirected somewhere else, keep track of the destination too + if ( $headers['x-final-location'] != $url ) + $this->url_remap[$headers['x-final-location']] = $upload['url']; + + return $upload; + + } + + // update url references in post bodies to point to the new local files + function backfill_attachment_urls() { + + // make sure we do the longest urls first, in case one is a substring of another + function cmpr_strlen($a, $b) { + return strlen($b) - strlen($a); + } + uksort($this->url_remap, 'cmpr_strlen'); + + global $wpdb; + foreach ($this->url_remap as $from_url => $to_url) { + $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->posts} SET post_content = REPLACE(post_content, '%s', '%s')", $from_url, $to_url) ); + } } // update the post_parent of orphans now that we know the local id's of all parents @@ -435,8 +528,9 @@ class WP_Import { } } - function import($id) { + function import($id, $fetch_attachments = false) { $this->id = (int) $id; + $this->fetch_attachments = (bool) $fetch_attachments; $file = get_attached_file($this->id); $this->import_file($file); @@ -452,6 +546,7 @@ class WP_Import { $this->process_tags(); $result = $this->process_posts(); $this->backfill_parents(); + $this->backfill_attachment_urls(); wp_defer_term_counting(false); if ( is_wp_error( $result ) ) return $result; @@ -487,7 +582,7 @@ class WP_Import { break; case 2: check_admin_referer('import-wordpress'); - $result = $this->import( $_GET['id'] ); + $result = $this->import( $_GET['id'], $_POST['attachments'] ); if ( is_wp_error( $result ) ) echo $result->get_error_message(); break; diff --git a/wp-admin/includes/file.php b/wp-admin/includes/file.php index 7f79eb41e..2a2908b89 100644 --- a/wp-admin/includes/file.php +++ b/wp-admin/includes/file.php @@ -147,26 +147,7 @@ function wp_handle_upload( &$file, $overrides = false ) { if ( ! ( ( $uploads = wp_upload_dir() ) && false === $uploads['error'] ) ) return $upload_error_handler( $file, $uploads['error'] ); - // Increment the file number until we have a unique file to save in $dir. Use $override['unique_filename_callback'] if supplied. - if ( isset( $unique_filename_callback ) && function_exists( $unique_filename_callback ) ) { - $filename = $unique_filename_callback( $uploads['path'], $file['name'] ); - } else { - $number = ''; - $filename = str_replace( '#', '_', $file['name'] ); - $filename = str_replace( array( '\\', "'" ), '', $filename ); - if ( empty( $ext) ) - $ext = ''; - else - $ext = ".$ext"; - while ( file_exists( $uploads['path'] . "/$filename" ) ) { - if ( '' == "$number$ext" ) - $filename = $filename . ++$number . $ext; - else - $filename = str_replace( "$number$ext", ++$number . $ext, $filename ); - } - $filename = str_replace( $ext, '', $filename ); - $filename = sanitize_title_with_dashes( $filename ) . $ext; - } + $filename = wp_unique_filename( $uploads['path'], $file['name'], $ext, $unique_filename_callback ); // Move the file to the uploads dir $new_file = $uploads['path'] . "/$filename"; diff --git a/wp-includes/functions.php b/wp-includes/functions.php index ee42b0608..d46363c53 100644 --- a/wp-includes/functions.php +++ b/wp-includes/functions.php @@ -531,8 +531,10 @@ function do_enclose( $content, $post_ID ) { } } - -function wp_get_http_headers( $url, $red = 1 ) { +// perform a HTTP HEAD or GET request +// if $file_path is a writable filename, this will do a GET request and write the file to that path +// returns a list of HTTP headers +function wp_get_http( $url, $file_path = false, $red = 1 ) { global $wp_version; @set_time_limit( 60 ); @@ -545,7 +547,12 @@ function wp_get_http_headers( $url, $red = 1 ) { if ( !isset( $parts['port'] ) ) $parts['port'] = 80; - $head = "HEAD $file HTTP/1.1\r\nHOST: $host\r\nUser-Agent: WordPress/" . $wp_version . "\r\n\r\n"; + if ( $file_path ) + $request_type = 'GET'; + else + $request_type = 'HEAD'; + + $head = "$request_type $file HTTP/1.1\r\nHOST: $host\r\nUser-Agent: WordPress/" . $wp_version . "\r\n\r\n"; $fp = @fsockopen( $host, $parts['port'], $err_num, $err_msg, 3 ); if ( !$fp ) @@ -555,7 +562,6 @@ function wp_get_http_headers( $url, $red = 1 ) { fputs( $fp, $head ); while ( !feof( $fp ) && strpos( $response, "\r\n\r\n" ) == false ) $response .= fgets( $fp, 2048 ); - fclose( $fp ); preg_match_all( '/(.*?): (.*)\r/', $response, $matches ); $count = count( $matches[1] ); for ( $i = 0; $i < $count; $i++ ) { @@ -567,12 +573,42 @@ function wp_get_http_headers( $url, $red = 1 ) { $headers['response'] = $return[1]; // HTTP response code eg 204, 200, 404 $code = $headers['response']; - if ( ( '302' == $code || '301' == $code ) && isset( $headers['location'] ) ) - return wp_get_http_headers( $headers['location'], ++$red ); + if ( ( '302' == $code || '301' == $code ) && isset( $headers['location'] ) ) { + fclose($fp); + return wp_get_http_headers( $headers['location'], $get, ++$red ); + } + + // make a note of the final location, so the caller can tell if we were redirected or not + $headers['x-final-location'] = $url; + // HEAD request only + if ( !$file_path ) { + fclose($fp); + return $headers; + } + + // GET request - fetch and write it to the supplied filename + $content_length = $headers['content-length']; + $got_bytes = 0; + $out_fp = fopen($file_path, 'w'); + while ( !feof($fp) ) { + $buf = fread( $fp, 4096 ); + fwrite( $out_fp, $buf ); + $got_bytes += strlen($buf); + // don't read past the content-length + if ($content_length and $got_bytes >= $content_length) + break; + } + + fclose($out_fp); + fclose($fp); return $headers; } +function wp_get_http_headers( $url ) { + return wp_get_http( $url, false ); +} + function is_new_day() { global $day, $previousday; @@ -992,7 +1028,7 @@ function wp_mkdir_p( $target ) { // Returns an array containing the current upload directory's path and url, or an error message. -function wp_upload_dir() { +function wp_upload_dir( $time = NULL ) { $siteurl = get_option( 'siteurl' ); //prepend ABSPATH to $dir and $siteurl to $url if they're not already there $path = str_replace( ABSPATH, '', trim( get_option( 'upload_path' ) ) ); @@ -1009,7 +1045,8 @@ function wp_upload_dir() { if ( get_option( 'uploads_use_yearmonth_folders' ) ) { // Generate the yearly and monthly dirs - $time = current_time( 'mysql' ); + if ( !$time ) + $time = current_time( 'mysql' ); $y = substr( $time, 0, 4 ); $m = substr( $time, 5, 2 ); $dir = $dir . "/$y/$m"; @@ -1026,7 +1063,35 @@ function wp_upload_dir() { return apply_filters( 'upload_dir', $uploads ); } -function wp_upload_bits( $name, $deprecated, $bits ) { +// return a filename that is sanitized and unique for the given directory +function wp_unique_filename( $dir, $name, $ext, $unique_filename_callback = NULL ) { + + // Increment the file number until we have a unique file to save in $dir. Use $override['unique_filename_callback'] if supplied. + if ( $unique_filename_callback && function_exists( $unique_filename_callback ) ) { + $filename = $unique_filename_callback( $dir, $name ); + } else { + $number = ''; + $filename = str_replace( '#', '_', $name ); + $filename = str_replace( array( '\\', "'" ), '', $filename ); + if ( empty( $ext) ) + $ext = ''; + else + $ext = ".$ext"; + $filename = $filename . $ext; + while ( file_exists( $dir . "/$filename" ) ) { + if ( '' == "$number$ext" ) + $filename = $filename . ++$number . $ext; + else + $filename = str_replace( "$number$ext", ++$number . $ext, $filename ); + } + $filename = str_replace( $ext, '', $filename ); + $filename = sanitize_title_with_dashes( $filename ) . $ext; + } + + return $filename; +} + +function wp_upload_bits( $name, $deprecated, $bits, $time = NULL ) { if ( empty( $name ) ) return array( 'error' => __( "Empty filename" ) ); @@ -1034,25 +1099,16 @@ function wp_upload_bits( $name, $deprecated, $bits ) { if ( !$wp_filetype['ext'] ) return array( 'error' => __( "Invalid file type" ) ); - $upload = wp_upload_dir(); + $upload = wp_upload_dir( $time ); if ( $upload['error'] !== false ) return $upload; - $number = ''; $filename = $name; $path_parts = pathinfo( $filename ); $ext = $path_parts['extension']; - if ( empty( $ext ) ) - $ext = ''; - else - $ext = ".$ext"; - while ( file_exists( $upload['path'] . "/$filename" ) ) { - if ( '' == "$number$ext" ) - $filename = $filename . ++$number . $ext; - else - $filename = str_replace( "$number$ext", ++$number . $ext, $filename ); - } + + $filename = wp_unique_filename( $upload['path'], $path_parts['basename'], $ext ); $new_file = $upload['path'] . "/$filename"; if ( ! wp_mkdir_p( dirname( $new_file ) ) ) {