-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathhtml-to-wp-import.php
158 lines (129 loc) · 5.38 KB
/
html-to-wp-import.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
<?php
/**
* HTML to WordPress Page Importer
*
* This script imports HTML pages into WordPress, including text content and images
*/
// Ensure this is being run in WordPress context
require_once('wp-load.php');
class HTMLToWordPressImporter {
private $html_directory;
private $image_directory;
public function __construct($html_dir, $image_dir = null) {
$this->html_directory = rtrim($html_dir, '/');
$this->image_directory = $image_dir ? rtrim($image_dir, '/') : $html_dir . '/images';
}
public function import() {
$html_files = glob($this->html_directory . '/*.html');
foreach ($html_files as $html_file) {
$this->process_html_file($html_file);
}
}
private function process_html_file($file_path) {
// Load HTML content
$html_content = file_get_contents($file_path);
if (!$html_content) {
error_log("Failed to read file: " . $file_path);
return;
}
// Create DOM document
$doc = new DOMDocument();
@$doc->loadHTML(mb_convert_encoding($html_content, 'HTML-ENTITIES', 'UTF-8'));
// Get page title
$title = basename($file_path, '.html');
$title_nodes = $doc->getElementsByTagName('title');
if ($title_nodes->length > 0) {
$title = $title_nodes->item(0)->nodeValue;
}
// Get main content (assuming it's in a main content div/article)
$content = '';
$main_content = $doc->getElementsByTagName('main');
if ($main_content->length === 0) {
$main_content = $doc->getElementsByTagName('article');
}
if ($main_content->length === 0) {
$main_content = $doc->getElementsByTagName('body');
}
if ($main_content->length > 0) {
$content = $doc->saveHTML($main_content->item(0));
} else {
$content = $doc->saveHTML();
}
// Process images
$content = $this->process_images($content);
// Create WordPress page
$page_data = array(
'post_title' => wp_strip_all_tags($title),
'post_content' => $content,
'post_status' => 'publish',
'post_type' => 'page'
);
// Check if page already exists
$existing_page = get_page_by_title($title, OBJECT, 'page');
if ($existing_page) {
$page_data['ID'] = $existing_page->ID;
wp_update_post($page_data);
echo "Updated page: " . $title . "\n";
} else {
wp_insert_post($page_data);
echo "Created new page: " . $title . "\n";
}
}
private function process_images($content) {
// Create upload directory if it doesn't exist
$upload_dir = wp_upload_dir();
if (!file_exists($upload_dir['path'])) {
wp_mkdir_p($upload_dir['path']);
}
// Find all images in content
$doc = new DOMDocument();
@$doc->loadHTML(mb_convert_encoding($content, 'HTML-ENTITIES', 'UTF-8'));
$images = $doc->getElementsByTagName('img');
foreach ($images as $image) {
$src = $image->getAttribute('src');
// Skip if already a full URL
if (filter_var($src, FILTER_VALIDATE_URL)) {
continue;
}
// Get image path
$img_path = $this->image_directory . '/' . basename($src);
if (file_exists($img_path)) {
// Prepare image for WordPress
$file_array = array(
'name' => basename($img_path),
'tmp_name' => $img_path
);
// Check file type
$wp_filetype = wp_check_filetype(basename($img_path), null);
// Prepare attachment data
$attachment = array(
'post_mime_type' => $wp_filetype['type'],
'post_title' => preg_replace('/\.[^.]+$/', '', basename($img_path)),
'post_content' => '',
'post_status' => 'inherit'
);
// Insert attachment into WordPress
$attach_id = wp_insert_attachment($attachment, $img_path);
if ($attach_id) {
// Include image.php if not already loaded
require_once(ABSPATH . 'wp-admin/includes/image.php');
// Generate attachment metadata and update
$attach_data = wp_generate_attachment_metadata($attach_id, $img_path);
wp_update_attachment_metadata($attach_id, $attach_data);
// Update image src in content
$new_src = wp_get_attachment_url($attach_id);
$content = str_replace($src, $new_src, $content);
}
}
}
return $content;
}
}
// Usage example (uncomment and modify paths as needed):
/*
$importer = new HTMLToWordPressImporter(
'/path/to/html/files', // Directory containing HTML files
'/path/to/images' // Directory containing images (optional)
);
$importer->import();
*/