Âîò îáåùàííûé òåêñò êðèïòà ìåòàãåíåðàòîðà
<?
### Ôóíêöèÿ ñîçäàåò ñïèñîê êëþ÷åâûõ ñëîâ ïî òåêñòó, à òàêæå êðàòêîå îïèñàíèå.
### Âõîäÿùèå ïàðàìåòðû:
### - $text - Ñîîáñòâåííî òåêñò äëÿ êîòîðîãî õîòèì ïîëó÷èòü êëþ÷åâûå ñëîâà
### - $keywords - Äîïîëíèòåëüíûå êëþ÷åâûå ñëîâà. Áóäóò äîáàâëÿòüñÿ â íà÷àëî ïîëó÷åííûõ.
### - $description - Äîïîëíèòåëüíîå îïèñàíèå. Áóäåò äîáàâëÿòüñÿ â íà÷àëî ïîëó÷åííîãî.
### (Ó÷òèòå, îïèñàíèå íå áîëåå 200 ñèìâîëîâ, ïîýòîìó ïîëó÷åííîå áóäåò
### îáðåçàòüñÿ ñ ó÷åòîì äëèííû äîïîëíèòåëüíîãî)
###
### Âûõîäíûå ïàðàìåòðû:
### - $meta['keywords'] - Ñîîòâåòñòâåííî êëþ÷åâûå ñëîâà
### - $meta['description'] - è îïèñàíèå
###
### Èñïîëüçîâàíèå:
### - $meta=create_meta($text);
### - $meta=create_meta($text, $keywords);
### - $meta=create_meta($text, 'äîïîëíèòåëüíûå,êëþ÷åâûå,ñëîâà');
### - $meta=create_meta($text, $keywords, $description);
### - $meta=create_meta($text, 'äîïîëíèòåëüíûå,êëþ÷åâûå,ñëîâà', 'äîïîëíèòåëüíîå,îïèñàíèå');
function create_meta($text, $keywords='', $description='') {
### Íîðìàëèçàöèÿ òåêñòà
$text=trim(stripslashes(preg_replace('/[\r\n\t]/i', ' ', strip_tags($text))));
### Ôîðìèðóåì îïèñàíèå èç òåêñòà, ìàêñ.200 çà äî ïåðâîãî çíàêà ïóíêòóàöèè
$idx=200;
if(!empty($description)) {
$description=trim($description).' ';
$idx-=strlen($description);
}
while(!in_array($text[$idx], array('.', '!', '?')))$idx--;
$meta['description']=$description.substr($text, 0, $idx+1);
### Çàãðóæàåì òàáëèöó îáùèõ ñëîâ è óäàëÿåì ýòè ñëîâà èç òåêñòà
$name='common-words.txt';
if(file_exists($name)) {
if($file=fopen($name, 'r')) {
$data='';
while(!feof($file)){
$word=trim(fgets($file));
if($word[0]=='#')continue;
$data.=' '.$word;
}
fclose($file);
$data=str_replace(' ', '|', trim($data));
}
$text=preg_replace('/\b'.$data.'\b/i', '', $text);
}
### Óäàëÿåì èç òåêñòà âñå çíàêè ïðåïèíàíèé è ïóíêòóàöèè è ïðåîáðàçóåì â ìàññèâ ñëîâ
$text=split(' ', preg_replace('/[^\w]+/i', ' ', $text)); $data='';
foreach($text as $key=>$word) if(strlen($word)>4)$data.=' '.strtolower($word);
$text=split(' ', trim($data)); $size=count($text);
$arr1=array(); $arr2=array(); $arr3=array();
### Ñòðîèì ìàññèâ ñëîâ îòñîðòèðîâàííûé ïî ÷àñòîòå âëîæåíèé â òåêñòå
for($i=0; $i<$size; $i++) {
$word=$text[$i];
if($arr1[$word])$arr1[$word]++; else $arr1[$word]=1;
}
arsort($arr1);
### Ñòðîèì ìàññèâ ôðàç ñîñòîÿùèõ èç äâóõ ñëîâ îòñîðòèðîâàííûé ïî ÷àñòîòå âëîæåíèé â òåêñòå
for($i=0; $i<$size-1; $i++) {
$word=$text[$i].' '.$text[$i+1];
if($arr2[$word])$arr2[$word]++; else $arr2[$word]=1;
}
arsort($arr2);
### Ñòðîèì ìàññèâ ôðàç ñîñòîÿùèõ èç òðåõ ñëîâ îòñîðòèðîâàííûé ïî ÷àñòîòå âëîæåíèé â òåêñòå
for($i=0; $i<$size-2; $i++) {
$word=$text[$i].' '.$text[$i+1].' '.$text[$i+2];
if($arr3[$word])$arr3[$word]++; else $arr3[$word]=1;
}
arsort($arr3);
### Âûáèðàåì 15 ïåðâûõ ñëîâ ñ ìàêñèìàëüíîé ÷àñòîòîé âëîæåíèé
$data=array(); $i=0;
foreach($arr1 as $word=>$count) {
$data[$word]=$count;
if($i++==16)break;
}
### Âûáèðàåì 8 ïåðâûõ ôðàç ñîñòîÿùèõ èç äâóõ ñëîâ ñ ìàêñèìàëüíîé ÷àñòîòîé âëîæåíèé
$i=0;
foreach($arr2 as $word=>$count) {
$data[$word]=$count;
if($i++==8)break;
}
### Âûáèðàåì 4 ïåðâûõ ôðàç ñîñòîÿùèõ èç òðåõ ñëîâ ñ ìàêñèìàëüíîé ÷àñòîòîé âëîæåíèé
$i=0;
foreach($arr3 as $word=>$count) {
$data[$word]=$count;
if($i++==4)break;
}
arsort($data); $text='';
### Ïåðåâîäèì ìàññèâ ôðàç â òåêñò, îïÿòü òàêè ñ ó÷åòîì ÷àñòîò âëîæåíèé
foreach($data as $word=>$count) $text.=','.$word; $text=substr($text, 1);
if(!empty($keywords))$keywords=preg_replace('/,$/i', '', $keywords).',';
$meta['keywords']=$keywords.$text;
### Âîçâðàùàåì ïîëó÷åííûé ðåçóëüòàò
return $meta;
}
?>
à âîò ôàéë common-words.txt
# Ñïèñîê îáùèõ ñëîâ äëÿ keyword ãåíåðàòîðà.
# Äîïóñêàåòñÿ îäíî îáùåå ñëîâî íà ëèíèþ.
# Ñïèñîê ìîæåò ñîñòîÿòü êàê èç àíãëèéñêèõ òàê è èç ðóññêèõ ñëîâ. Âëèÿåò òîëüêî íà ïðîèçâîäèòåëüíîñòü.
a
the
because
òàêæå
íèáóäü
ýòîãî
íåêîòîðûå
êîòîðîé
êîòîðûé
íàîáîðîò
òîëüêî
ìíîãî
íåêòî
íàøåé
íàøåãî
òîãäà
ïîñëå
âñåãî
ïèñàëè
åñòåñòâåííî, åãî ìîæíî ñäåëàòü â âèäå òâïàðàìåòðà, è íàáèâàòü â íåãî ýòè ñëîâà. ß åùå íå ïåðåäåëûâàë åãî äëÿ ÌîäÕ, åñëè êòî õî÷åò, ìîæåò ïîïðîáîâàòü.
È åùå. Âîò â ÌîäÕ åñòü òàêîé ñíèïåò - íåäàâíî íàøåë. Îí ãåíåíðèðóåò êåéâîðäû íà îñíîâàíèè òåêñòà ñòðàíèöû
/****************************************************<?
* Name: PageKeywords
* Version 0.3 - just fixed a truncation error 18. Aug. 2006
* Desc: returns a list of words from the document's content
* created by Paul Paulousek [email protected]
* date Aug, 17. 2006
* based on tagcloud of Marc Hinse, [email protected], www.modxcms.de
* Version: 1.1.
*
* Usage: [[PageKeywords?min=`2`]]
* Parameters:
* min: Minimum occurrences of a word to be displayed (defaults to 1)
* if a chunk named "cloudwords" exists, the snippet will take the content of it for building the cloud
* the chunk must contain a comma-separated list of words and phrases.
* ***************************************************/
$min = isset($min)? $min : "1";
$include = $modx->getChunk('cloudwords');
$include = explode(",",$include);
//try to get a Chunk for the include-words
if (sizeof($include)<2) {
$include = array('blog','method','edit','content','design','links','quickedit','MODx','customize');
//include list (sample, put in your words here)
}
//try to get a Chunk for the include-words
if (!isset($include)) {
$include = array('blog','method','edit','content','design','links','quickedit','MODx','customize');
}
$zeichen = array(',','.',':'); //array of chars to be deleted
if (strpos($parent,',')){
$parent= explode(',',$parent);
}
$cid = $modx->documentIdentifier; //current document
$contents = $modx->getDocument($cid);
// get the normal content
$content=$contents['content'];
// get the TVs
$tvcontent = $modx->getTemplateVarOutput('*', $cid);
// clear the content-TV
$tvcontent['content'] = "";
//put them together
foreach($tvcontent as $key => $value)
{
$content.= strip_tags($value)." ";
}
$output='';
//all to lower and without HTML
$words=array();
foreach($include as $incword)
{
$wert = substr_count($content,$incword);
if($wert>=$min)
{ //look if the word counts the required minimum and is in the include list
$words[$incword] = $wert; //put them in a new array
}
}
ksort($words); //sort them alphabetically (just comment that out, then they will be unsorted
foreach($words as $key => $wert) {
$output.= $key.',';
}
$output = substr($output,0,strlen($output)-1); //delete last comma and space
return $output;