1,"Feb"=>2,"Mar"=>3,"Apr"=>4,"May"=>5,"Jun"=>6,"Jul"=>7,"Aug"=>8,"Sep"=>9,"Oct"=>10,"Nov"=>11,"Dec"=>12); //WORK VARIABLES var $xover; //var $group; var $groupid; //collection part search list var $psL=array(); var $psL_cnt; //file extension search list var $sL=array(); var $sL_cnt; //holds array member count for $sL //cache for header and nfo items var $headcache=array(); var $nfocache=array(); //cache updates to existing files and collections var $fileupdatecache=array(); var $collupdatecache=array(); //var $matchtofilecache=array();//filestem,sender,fileid,collid var $collupdatenewfilecache=array(); //var is depricated(but supported) in PHP5 //better to use private //private $filescache = array(); var $filescache = array(); var $filecachequeue = array(); var $collectionscache=array(); var $collectionscachequeue=array(); //var $stat = 0; function parseXOVER(){ //set the default configuration values from the globals config array... $this->minstemlen=$GLOBALS['CONF_parsexover']['CONF_minstemlength']; $this->maxnfosize=$GLOBALS['CONF_parsexover']['CONF_maxnfobytes']; $this->minsingle_yenc_size=$GLOBALS['CONF_parsexover']['CONF_minsingleyencbytes']; $this->maxREsize=$GLOBALS['CONF_parsexover']['CONF_maxREbytes']; $this->saveXREF=$GLOBALS['CONF_parsexover']['CONF_saveXREF']; //the list of search patterns //these are used to find file type, and remove type specific info //to make the matching stem. /***RAR***/ $this->sL[] = array('uniPat'=>'(?:\.part[^\.]*\.rar|\.rar|\.r\d{2,3})','type'=>'rar',); /***PAR2***/ $this->sL[] = array('uniPat'=>'(?:\.vol[^\.]*)?\.par2','type'=>'par2',); /***PAR***/ $this->sL[] = array('uniPat'=>'(?:\.vol[^\.]*)?\.par','type'=>'par',); /***NZB***/ $this->sL[] = array('uniPat'=>'\.nzb', 'type' =>'nzb',); /***SFV***/ $this->sL[] = array('uniPat'=>'\.sfv', 'type' =>'sfv',); /***SPLIT***/ // (\.) from the dot (\d*) any numeric ($) to end of line $this->sL[] = array('uniPat'=>'\.(?:part)?\d{2,4}(?:\.\w{2,3})?', 'type' =>"SPLIT",); /***NFO***/ $this->sL[] = array('uniPat'=>'\.nfo', 'type' =>'nfo',); /***OTHER***/ // (\.) from the dot (\w*) any alphanumeric ($) to end of line $this->sL[] = array('uniPat'=>'\.\w{3}', 'type' =>'OTHER',); $this->sL_cnt=count($this->sL); //these are the collection info removal search strings //First removes part info: N-N,N/N,NofN //second removes size info: N.N mb/kb/b/bytes/kbytes/mbytes "/\s+\d+(?:\.\d+)?\s?(k|m)?b(ytes)?/i", //third removes collection part counts: [NNN] (up to three digits (simplified by:bibi-pov) $this->psL=array("/\d+\s?(?:-|\/|of|de)\s?\d+/iS", "/\d+(?:\.\d+)?\s?(?:k|m)?(?:b(?:ytes|ite)?|o(?:ctet)?)/i", "|\[\d{1,3}\]|S"); $this->psL_cnt=count($this->psL); } function findfile(){ $this->xover['fileinfo']=$this->yencfiles($this->xover['subject'],$this->xover['bytes']); if($this->xover['fileinfo']){//there was valid file info return true; }else{ return false; } } function findcollection(){ $this->xover['collectioninfo']=$this->collectionbuilder($this->xover['fileinfo']['subjectstem']); } function XOVER2ARRAY($xover){ $ov=explode($this->TAB,$xover); //$xo['groupid']=$this->groupid; $xo['id']=$ov[0]; $xo['subject']=$ov[1]; $xo['sender']=$ov[2]; $xo['rawdate']=$ov[3]; //strip <> from message ID $xo['messageid']=trim($ov[4]," <>"); //$xo['ref']=$ov[5]; $xo['bytes']=$ov[6]; //$xo['lines']=$ov[7]; //only do this if saveXREF is enabled if($this->saveXREF){ //check only the remaining array members for the xref $arr_len=count($ov); for($i=8;$i<$arr_len;$i++){ if(substr($ov[$i],0,4)=='Xref'){ //take it apart $xref=explode(" ",$ov[$i]); $xref2=explode(':',$xref[2]); if(!($xref2[0]==$group)){ $xo['ag1']=$xref2[0]; } $xref2=explode(':',$xref[3]); if(!($xref2[0]==$group)){ if(empty($xover['ag1'])){ $xo['ag1']=$xref2[0]; }else{ $xo['ag2']=$xref2[0]; } } } }//for } $this->xover=$xo; } /******************************************** * * This is the function that finds encoded files * the best way to do this is to find the part numbers. * yEnc messages will have a (#/#) part number marking. * Its always (?) the last thing (late) in the subject. * We can get 99.99% of files using this header. * * STEM: this is the file name we use to match multiple parts. * The stem is just the header with the (#/#) removed * (all parts of a file will look identical). * We save this and use it to match headers. * * I once saw a poster's .par2 file not show up because they didn't use (1/1) for single parts. * So, I added the check below that looks for 'yenc' if (\d/\d) is not located. * * ************************************************/ function yencfiles($subject,$bytes){ //get rid of replies before doing anything else! //do a size check! //this should get rid of all the re: messages if( ($bytes<$this->maxREsize)&&(stristr(substr($subject,0,3),'RE:')) ){ //with RE: and small size, probably a reply! return false; } $foundparts=false; //are there parts? preg_match_all("|\((\d*)[\/](\d*)\)|U",$subject, $out, PREG_PATTERN_ORDER); //if multiple (#/#) are found, we use the last one (almost always correct!) //this replaces a loop that did far more work than needed. $pc=count($out[0]); if($pc>0){ $pc--; if(!strcmp($out[1][$pc],'0')){return false;}//we ignore part 0/## $fileinfo['part']=$out[1][$pc]; $fileinfo['totalparts']=$out[2][$pc]; $foundparts=true; } /* $arr_count=count($out[0]); for($i=0;$i<$arr_count;$i++){ $fileinfo['part']=$out[1][$i]; $fileinfo['totalparts']=$out[2][$i]; $foundparts=true; } */ //if there were any matches for (#/#) build the file stem if($foundparts){ //build the matching subject stem //this is the subject without the (##/##) //will be used to match other parts //remove everything AFTER the (/), this will catch the annoying files with (/) ###kB|### MBytes|etc! //we do this with a simple split from the last found (/) $fa=explode('('.$fileinfo['part'].'/'.$fileinfo['totalparts'].')',$subject); $fileinfo['subjectstem']=$fa[0]; return $fileinfo; } /**************************************** //for some files that are (1/1) and dont give a part - //does it contain yEnc? //how big is it? greater than XXK *******************************************/ if( ($bytes>$this->minsingle_yenc_size)&&stristr($subject,'yenc') ){ $fileinfo['subjectstem']=$subject; //just return as 1 of 1 $fileinfo['part']=1; $fileinfo['totalparts']=1; return $fileinfo; } return false; } //a function to store rejected posts to look at later function logreject(&$sql){ $sql->logreject($this->xover['subject']); } /*********************************************************************************** * * This function looks for a file name, extension, and creates a collection stem. * * The stem is a generic version of the header with all unique info removed: * part # * file extentions * whole file names (this can be too agressive, so we do a length check) * * The resulting 'stem' can be used to match all the files to a collection. * * * *************************************************************************************/ function collectionbuilder($subjectstem){ /************************************** * * strip everything unique out of the subject and save the stem * *******************************************/ $collectionstem=$subjectstem; //use the part search patterns to find/remove the part info //$arr_count=count($this->psL); for($i=0;$i<$this->psL_cnt;$i++){ $collectionstem=preg_replace($this->psL[$i],'',$collectionstem);//add a break??? } //1. look for a file name //2. look for extensions //look for known extensions //$arr_count=count($this->sL); $strlen_collectionstem=strlen($collectionstem); for($i=0;$i<$this->sL_cnt;$i++){ //better collection matching - pull out any file name in the "" to get more generic name ////first check for file inside "" - "|\".*\"|U" //the new version can be too aggressive - // we need to check that the stem is > 40? chars, else, just strip extension. //also, sometimes there are spaces in a file name (if it is in quotes, esp), //we should check first for quotes and clean differently if its in quotes (to the quote?) // $cs=preg_replace("|[\s\"-^]\S*".$this->sL[$i]['uniPat']."[\s\"$]|Ui",'',$collectionstem); $cs=preg_replace("/[\s\[\"-\(^]\S*".$this->sL[$i]['uniPat']."[\s\]\"\)$]/Ui",'',$collectionstem); $strlen_cs=strlen($cs); if($strlen_cs<$strlen_collectionstem){ $collectioninfo['filetype']=$this->sL[$i]['type']; if($strlen_cs>$this->minstemlen){//how many characters before we worry? $collectioninfo['stem']=$cs; return $collectioninfo; }else{ //just try to extract the extention, //there isn't enough stem left if we kill the file name $cs=preg_replace("/".$this->sL[$i]['uniPat']."[\s\]\"\)$]/Ui",'',$collectionstem); $collectioninfo['stem']=$cs; return $collectioninfo; } } } //No type or extension identified, what do we do? //--------------------- //try loose search for($i=0;$i<$this->sL_cnt;$i++){ $cs=preg_replace("/".$this->sL[$i]['uniPat']."/Ui",'',$collectionstem); if(strlen($cs)<$strlen_collectionstem){ $collectioninfo['filetype']=$this->sL[$i]['type']; $collectioninfo['stem']=$cs; return $collectioninfo; } } //do something - ???? //just use other and hope for the best. $collectioninfo['stem']=$collectionstem; $collectioninfo['filetype']='OTHER'; return $collectioninfo; } function savetodb(&$sql){ $fi=$this->matchfilecache($sql); if($fi){ $this->xover['fileinfo']['fileid']=$fi['key']; $this->xover['collectioninfo']['id']=$fi['collid']; //we have a match, just update size and available parts //cache these, no need for instant update $this->cacheupdate($this->xover['fileinfo']['fileid'], $this->xover['collectioninfo']['id'], $this->xover['bytes']); //add (reduced) header to header DB //cache the header to reduce hits to the database $this->cacheheader($this->xover); //we dont do multipart NFO files, so this will never be an NFO, return return; } //no match! need to update collection file type count //check for collection match to stem $ci=$this->matchcollectioncache($sql); if($ci){ $this->xover['collectioninfo']['id']=$ci['cid']; //update collection, increment file type counter, parts total parts $this->cacheupdatecollectionnewfile($this->xover['fileinfo']['totalparts'], $this->xover['bytes'], $this->xover['collectioninfo']['filetype'], $this->xover['collectioninfo']['id']); }else{//no collection match //make a new collection, get the ID $this->xover['collectioninfo']['id']=$this->addcollectioncache($sql); } //create a new file record, get the ID $this->xover['fileinfo']['fileid']=$this->addfilecache($sql); //add (reduced) header to header DB //cache the header to reduce hits to the database $this->cacheheader($this->xover); //for the NFO builder if(($GLOBALS['CONF_nfo']['getnfo'])&& ($this->xover['collectioninfo']['filetype']=='nfo')&& (!$ci['nfo']) && ($this->xover['bytes']<$this->maxnfosize) ){ //add NFO to the cache $this->nfocache[$this->xover['collectioninfo']['id']]=$this->xover['messageid']; } } /*-----------------------------------------------------------------------------*/ // Cache and Cache Control Functions //------------------------------------------------------------------------------// function flushcache(&$sql){ //need to test the cache for data if($this->headcache){ //save all the headers $sql->addheaderfromcache($this->headcache); //flush the cache, free up memory $this->headcache=array(); } if($this->fileupdatecache){ $sql->updatefilefromcache($this->fileupdatecache); $this->fileupdatecache=array(); } if($this->collupdatecache){ $sql->updatecollfromcache($this->collupdatecache); $this->collupdatecache=array(); } if($this->collupdatenewfilecache){ $sql->collupdatenewfilefromcache($this->collupdatenewfilecache); $this->collupdatenewfilecache=array(); } if($this->nfocache){ //save to nfo fetch table $sql->addnfofromcache($this->nfocache); $this->nfocache=array(); } } function clearfilescache(){ $this->filescache=array(); $this->filecachequeue = array(); $this->collectionscache=array(); $this->collectionscachequeue=array(); } function matchfilecache(&$sql){ //match to file caching - added by bibi-pov $cachekey = $this->xover['fileinfo']['subjectstem'].strlen($this->xover['fileinfo']['subjectstem']).'|'. $this->xover['sender'].strlen($this->xover['sender']); if(array_key_exists($cachekey, $this->filescache)) return $this->filescache[$cachekey]; if (!array_key_exists('date', $this->xover)) { // Transform the date as a timestamp only if doesn't exists in the array already $this->xover['date'] = $this->getTimestamp($this->xover['rawdate']); } $fileinfo=$sql->matchtofile($this->groupid,$this->xover); if($fileinfo){ //if (!array_key_exists($cachekey, $this->filescache)) $this->filecachequeue[] = $cachekey; $this->filescache[$cachekey] = $fileinfo; if (count($this->filecachequeue) > $this->CACHESIZE) { // If cache is bigger than the limit, then remove the oldest item we added //echo '#'.$this->stat++."Truncating cache from match (".count($this->filecachequeue).'/'.self::CACHESIZE." elems)\n"; unset($this->filescache[array_shift($this->filecachequeue)]); } } return $fileinfo; } function addfilecache(&$sql){ if (!array_key_exists('date', $this->xover)) { // Transform the date as a timestamp only if doesn't exists in the array already $this->xover['date'] = $this->getTimestamp($this->xover['rawdate']); } //caching, add here so we don't hit again $cachekey = $this->xover['fileinfo']['subjectstem'].strlen($this->xover['fileinfo']['subjectstem']).'|'. $this->xover['sender'].strlen($this->xover['sender']); $fileinfo['key']=$sql->addfile($this->groupid,$this->xover); $fileinfo['collid']=$this->xover['collectioninfo']['id']; //if (!array_key_exists($cachekey, $this->filescache)) $this->filecachequeue[] = $cachekey; $this->filescache[$cachekey] = $fileinfo; if (count($this->filecachequeue) > $this->CACHESIZE) { // If cache is bigger than the limit, then remove the oldest item we added //echo '#'.$this->stat++."Truncating cache from add (".count($this->filecachequeue).'/'.self::CACHESIZE." elems)\n"; unset($this->filescache[array_shift($this->filecachequeue)]); } return $fileinfo['key']; } function matchcollectioncache(&$sql){ //match to file caching - added by bibi-pov $cachekey = $this->xover['collectioninfo']['stem'].strlen($this->xover['collectioninfo']['stem']).'|'. $this->xover['sender'].strlen($this->xover['sender']); if(array_key_exists($cachekey, $this->collectionscache)) return $this->collectionscache[$cachekey]; if (!array_key_exists('date', $this->xover)) { // Transform the date as a timestamp only if doesn't exists in the array already $this->xover['date'] = $this->getTimestamp($this->xover['rawdate']); } $a=$sql->matchtocollection($this->groupid,$this->xover); if($a){ $this->collectionscache[$cachekey] = $a; $this->collectionscachequeue[] = $cachekey; if (count($this->collectionscachequeue) > $this->CACHESIZE) { // If cache is bigger than the limit, then remove the oldest item we added unset($this->collectionscache[array_shift($this->collectionscachequeue)]); } } return $a; } function addcollectioncache(&$sql){ if (!array_key_exists('date', $this->xover)) { // Transform the date as a timestamp only if doesn't exists in the array already $this->xover['date'] = $this->getTimestamp($this->xover['rawdate']); } //cache so we don't have to hit it again $cachekey = $xo['collectioninfo']['stem'].strlen($xo['collectioninfo']['stem']).'|'. $xo['sender'].strlen($xo['sender']); $a['cid']=$sql->addcollection($this->groupid,$this->xover); $a['nfo']=false; $this->collectionscache[$cachekey] = $a; $this->collectionscachequeue[] = $cachekey; if (count($this->collectionscachequeue) > $this->CACHESIZE) { // If cache is bigger than the limit, then remove the oldest item we added unset($this->collectionscache[array_shift($this->collectionscachequeue)]); } return $a['cid']; } function cacheupdate($f,$c,$b){ $this->fileupdatecache[$f]['bytes']+=$b; $this->fileupdatecache[$f]['parts']++; $this->collupdatecache[$c]['bytes']+=$b; $this->collupdatecache[$c]['parts']++; } function cacheheader(&$xo){ $headitem['messageid']=$xo['messageid']; $headitem['bytes']=$xo['bytes']; $headitem['fileid']=$xo['fileinfo']['fileid']; $headitem['part']=$xo['fileinfo']['part']; $this->headcache[]=$headitem; } function cacheupdatecollectionnewfile($filetotalparts,$bytes,$filetype,$collid){ $this->collupdatenewfilecache[$collid]['parts']++; $this->collupdatenewfilecache[$collid]['filetypes'][$filetype]++; $this->collupdatenewfilecache[$collid]['size']+=$bytes; $this->collupdatenewfilecache[$collid]['totalparts']+=$filetotalparts; } /*-----------------------------------------------------------------------------*/ // Misc. Utility Functions //------------------------------------------------------------------------------// function adressDecode($adrstring,$defaulthost) { $parsestring=trim($adrstring); $len=strlen($parsestring); $at_pos=strpos($parsestring,'@'); // find @ $ka_pos=strpos($parsestring,"("); // find ( $kz_pos=strpos($parsestring,')'); // find ) $ha_pos=strpos($parsestring,'<'); // find < $hz_pos=strpos($parsestring,'>'); // find > $space_pos=strpos($parsestring,')'); // find ' ' $email=""; $mailbox=""; $host=""; $personal=""; if ($space_pos != false) { if (($ka_pos != false) && ($kz_pos != false)) { $personal=substr($parsestring,$ka_pos+1,$kz_pos-$ka_pos-1); $email=trim(substr($parsestring,0,$ka_pos-1)); } } else { $email=$adrstring; } if (($ha_pos != false) && ($hz_pos != false)) { $email=trim(substr($parsestring,$ha_pos+1,$hz_pos-$ha_pos-1)); $personal=substr($parsestring,0,$ha_pos-1); } if ($at_pos != false) { $mailbox=substr($email,0,strpos($email,'@')); $host=substr($email,strpos($email,'@')+1); } else { $mailbox=$email; $host=$defaulthost; } $personal=trim($personal); if (substr($personal,0,1) == '"') $personal=substr($personal,1); if (substr($personal,strlen($personal)-1,1) == '"') $personal=substr($personal,0,strlen($personal)-1); $result["mailbox"]=trim($mailbox); $result["host"]=trim($host); if ($personal!="") $result["personal"]=$personal; $complete[]=$result; return ($complete); } function headerDecode($value) { if (eregi('=\?.*\?Q\?.*\?=',$value)) { $result=eregi_replace('(.*)=\?.*\?Q\?(.*)\?=(.*)','\1\2\3',$value); if ($value != $result) $result=headerDecode($result); $result=str_replace("_"," ",quoted_printable_decode($result)); return($result); } if (eregi('=\?.*\?B\?.*\?=',$value)) { $result=eregi_replace('(.*)=\?.*\?B\?(.*)\?=(.*)','\1\2\3',$value); if ($value != $result) $result=headerDecode($result); $result=str_replace("_"," ",base64_decode($result)); return($result); } return($value); } function getTimestamp($value) { //$value=str_replace(" "," ",$value); // Shouldn't be necessary with strtok $d=strtok(&$value,' '); if ($d[strlen($d)-1] == ",") { $date0=strtok(' '); // day $date1=strtok(' '); // month $date2=strtok(' '); // year $date3=strtok(' '); // hours:minutes:seconds $date4=strtok(' '); // Timezone } else { $date0=$d; // day $date1=strtok(' '); // month $date2=strtok(' '); // year $date3=strtok(' '); // hours:minutes:seconds. $date4=strtok(' '); // Timezone } $time0=strtok($date3,':'); $time1=strtok(':'); $time2=strtok(':'); if (substr($date4,0,5) == '+0000' || substr($date4,0,5) == '-0000') { // Do nothing as it's already a UTC date } elseif (substr($date4,0,1) == '-') { $time0 += substr($date4,1,2); $time1 += substr($date4,3,2); } elseif (substr($date4,0,1) == '+') { $time0 -= substr($date4,1,2); $time1 -= substr($date4,3,2); } return date('YmdHis',gmmktime($time0,$time1,$time2,$this->months[$date1],$date0,$date2)); } }//class /* //this function SHOULD be faster than the one in use, but it is not. function XOVER2ARRAYold($groupid,$group,$xover){ $this->xover['group']=$group; $this->xover['groupid']=$groupid; $elements = explode($this->TAB, $xover); //its faster to use this->tab than parse the string from "" each time. $this->xover['id']=$elements[0]; $this->xover['subject']=$elements[1]; $this->xover['sender']=$elements[2]; $this->xover['rawdate']=$elements[3]; //strip <> from message ID $this->xover['messageid']=trim($elements[4]," <>"); // skip ref $this->xover['bytes']=$elements[6]; // skip lines //check only the remaining array members for the xref $i = 7; while($xref = $elements[$i++]){ if(substr($xref,0,4)=='Xref'){ //take it apart and skips first value strtok($xref, ' '); strtok(' '); $xref2=strtok(':'); if($xref2!=$group){ $this->xover['ag1']=$xref2; } if (strtok(' ') === false) break; $xref2=strtok(':'); if(!($xref2==$group)){ if(empty($xover['ag1'])){ $this->xover['ag1']=$xref2; }else{ $this->xover['ag2']=$xref2; } } } }//for } */ ?>