HEX
Server: nginx/1.18.0
System: Linux test-ipsremont 5.4.0-214-generic #234-Ubuntu SMP Fri Mar 14 23:50:27 UTC 2025 x86_64
User: ips (1000)
PHP: 8.0.30
Disabled: pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wifcontinued,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_get_handler,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,pcntl_async_signals,pcntl_unshare,
Upload Files
File: /var/www/elite/sitemap-checker.php
<?php
unset($argv[0]);
$options = array();
foreach($argv as $arg) {
    $argData = explode('=', $arg);
    $options[$argData[0]] = $argData[1];
}

$start = isset($options['start']) ? $options['start'] : 1;
$siteUrl = 'https://3000077.ru';
$clearSiteUrl = '3000077.ru';

echo 'Читаем файл sitemap: ' . $siteUrl . '/sitemap.xml' . PHP_EOL;
$xmlFile = simplexml_load_file($siteUrl . '/sitemap.xml');
$num = 0;
foreach($xmlFile as $row) {
    $num++;
    if ($num < $start) continue;
    $url = $row->loc;

    echo 'Checking ' . $num . '/' . count($xmlFile) . ': ' . $url . PHP_EOL;

    $pageHeaders = getHeaderUrl(trim($url));
    if ($pageHeaders[0]['http_code'] !== 200) {
        echo ' - error!' . $pageHeaders[0]['http_code'] . PHP_EOL;
        exit;
    }

    echo ' - success' . PHP_EOL;
    echo PHP_EOL;
}

function convert($size)
{
    $unit=array('b','kb','mb','gb','tb','pb');
    return @round($size/pow(1024,($i=floor(log($size,1024)))),2).' '.$unit[$i];
}

function getUrl($url, $followLocation = true) {
    $ch = curl_init( $url);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, $followLocation);
    curl_setopt($ch, CURLOPT_HEADER, true);
    curl_setopt($ch, CURLOPT_HTTPGET, 1);
    curl_setopt($ch, CURLOPT_DNS_USE_GLOBAL_CACHE, false );
    curl_setopt($ch, CURLOPT_DNS_CACHE_TIMEOUT, 2 );
    curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4 );
    curl_exec($ch);
    $content = curl_exec($ch);
    $info = curl_getinfo($ch);
    $error = curl_error($ch);
    curl_close($ch);

    return [$info, $content, $error];
}

function getHeaderUrl($url, $followLocation = false) {
    $ch = curl_init( $url);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, $followLocation);
    curl_setopt($ch, CURLOPT_HEADER, false);
    curl_setopt($ch, CURLOPT_HTTPGET, 1);
    curl_setopt($ch, CURLOPT_DNS_USE_GLOBAL_CACHE, false );
    curl_setopt($ch, CURLOPT_DNS_CACHE_TIMEOUT, 2 );
    curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4 );
    curl_exec($ch);
    $info = curl_getinfo($ch);
    $error = curl_error($ch);
    curl_close($ch);

    return [$info, $error];
}

function get_pageInfo($thislink){
	global $siteUrl, $clearSiteUrl;

    list($curlInfo, $content, $error) = getUrl($thislink);
    if (!empty($error)) {
        echo $error . ' - retry' . PHP_EOL. PHP_EOL;
        return;
    }
    if ($curlInfo['http_code'] == 404) {
        echo ' – 404 found' . PHP_EOL;
        return 404;
    }

	preg_match_all('/<a\b[^>]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/i' , $content , $links1);
	preg_match_all('/<a\b[^>]*href=\'([^\']*)\'[^>]*>([\s\S]*?)<\/a>/i' , $content , $links2);

	// Собираем ссылочки в один массив
	$links['links'] = array_merge($links1[1],$links2[1]);
	$links['aliases'] = array_merge($links1[2],$links2[2]);

    echo $curlInfo['http_code'] . ' ' . count($links['links'] ) . PHP_EOL;

    $sublinks = [];

	// Делим ссылки на внешние и внутренние + нормализуем их
	foreach ($links['links'] as $key=>$link){
		if (strpos($link,'javascript') !== false) continue;
		if (strpos($link,'mailto:') !== false) continue;
        if (strpos($link,'tel:') !== false) continue;
		if (strpos($link,'skype:') !== false) continue;

		if (preg_match('/https:\/\/[^\/]*/i',$link,$match)) {
			if ($match[0] == $siteUrl || $match[0] == 'https://' . $clearSiteUrl) {
				$sublinks[] = $link;
			} else {
				// Nothing
			}
		} else  {
			if (strpos($link,'../') === 0) {
				$levels = explode('/',$thislink);
				$backlevel = 0;
				while (strpos($link,'../') === 0){
					$link = substr($link,3);
					$backlevel++;
				}
				$level = count($levels) - $backlevel;
				if ($level <= 3) {
					$sublinks[] = $siteUrl.'/'.$link;
				} else {
					$newlink = '';
					for($i=0;$i<=$level;$i++){
						if ($newlink) $newlink .= '/';
						$newlink .= $levels[$i];
					}
					$sublinks[] = $newlink.'/'.$link;
				}
			} else {
				if (strpos($link,'/') === 0) {
                    $sublinks[] = $siteUrl.$link;
				} else {
                    $sublinks[] = $thislink.$link;
				}
			}
		}
	}

	return $sublinks;
}

echo 'Done' . PHP_EOL;