Logo Search packages:      
Sourcecode: kdesdk version File versions

poinfo.cpp

/*  
  This file is part of KBabel
  Copyright (C) 2002 Stefan Asserhäll <stefan.asserhall@telia.com>
            2003 Stanislav Visnovsky <visnovsky@kde.org>

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.      See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  
*/

#include "poinfo.h"

#include "catalogitem.h"
#include "findoptions.h"
#include "msgfmt.h"
#include "resources.h"

#include <kapplication.h>
#include <kio/netaccess.h>
#include <kstandarddirs.h>

#include <qdatastream.h>
#include <qdatetime.h>
#include <qdict.h>
#include <qfile.h>
#include <qfileinfo.h>
#include <qregexp.h>
#include <qtextcodec.h>

// from libgettext
extern "C"
{
#include "libgettext/fstrcmp.h"
}

#include "libgettext/pofiles.h"
#include "libgettext/tokens.h"

#include <fstream>

using namespace KBabel;

// A PO-file cache item
struct poInfoCacheItem
{
    PoInfo info;
    QDateTime lastModified;
};

// Cache of PO-file items
static QDict<poInfoCacheItem> _poInfoCache;

// File name of cache
static QString _poInfoCacheName;

// flag to stop current reading
bool PoInfo::stopStaticRead;

bool PoInfo::_gettextPluralForm;

00072 void PoInfo::cacheRead()
{
    QFile cacheFile( _poInfoCacheName );
    if( cacheFile.open( IO_ReadOnly ) ) {
      QDataStream s( &cacheFile );
      s.setPrintableData ( true );
      QString url;
      int version;

      s >> version;
      if( version == POINFOCACHE_VERSION ) // Only read if correct version
          while( !s.atEnd() ) {
            poInfoCacheItem* item = new poInfoCacheItem;
            s >> url;
            s >> item->info.total;
            s >> item->info.fuzzy;
            s >> item->info.untranslated;
            s >> item->info.project;
            s >> item->info.creation;
            s >> item->info.revision;
            s >> item->info.lastTranslator;
            s >> item->info.languageTeam;
            s >> item->info.mimeVersion;
            s >> item->info.contentType;
            s >> item->info.encoding;
            s >> item->info.others;
            s >> item->info.headerComment;
            s >> item->lastModified;
            _poInfoCache.insert( url, item );
          }
      cacheFile.close();
    }
}

void PoInfo::cacheWrite()
{
    QFile cacheFile( _poInfoCacheName );
    QDictIterator<poInfoCacheItem> it( _poInfoCache ); // iterator for dict

    if( cacheFile.open( IO_WriteOnly ) ) {
      QDataStream s( &cacheFile );
      // FIXME: remove when finally found the problem with corrupted cache
      s.setPrintableData ( true );
      int version = POINFOCACHE_VERSION;

      s << version;
      while( it.current() ) {
          if( QFile::exists( it.currentKey() ) ) {
            poInfoCacheItem* item = it.current();
            s << it.currentKey();
            s << item->info.total;
            s << item->info.fuzzy;
            s << item->info.untranslated;
            s << item->info.project;
            s << item->info.creation;
            s << item->info.revision;
            s << item->info.lastTranslator;
            s << item->info.languageTeam;
            s << item->info.mimeVersion;
            s << item->info.contentType;
            s << item->info.encoding;
            s << item->info.others;
            s << item->info.headerComment;
            s << item->lastModified;
          }
          ++it;
      }
      cacheFile.close();
    }
}

00143 bool PoInfo::cacheFind(const QString url, PoInfo& info)
{
    // Read cache if it has not been read, and set up post routine to write it
    static bool _cacheIsRead = false;
    if( !_cacheIsRead ) {
      _cacheIsRead = true;
        _poInfoCacheName = locateLocal("data", "kbabel/poinfocache");
      cacheRead();
    }

    poInfoCacheItem *item = _poInfoCache.find( url );
    if( item ) {
      QFileInfo fi( url );

      if( fi.lastModified() == item->lastModified ) {
          info = item->info;
          return true;
      }
    }
    return false;
}

00165 void PoInfo::cacheSave(const QString url, PoInfo& info)
{
    poInfoCacheItem *item = new poInfoCacheItem;
    QFileInfo fi( url );

    item->info = info;
    item->lastModified = fi.lastModified();
    _poInfoCache.insert( url, item );
}

00175 QTextCodec* PoInfo::codecForFile(QString gettextHeader)
{
   QString charset;

   QString head = gettextHeader;

   QRegExp r("Content-Type:\\s*\\w+/[-\\w]+;\\s*charset\\s*=\\s*[^\\\"\\n]+");
   int begin=r.search(head);
   int len=r.matchedLength();
   if(begin<0) {
      kdDebug(KBABEL) << "no charset entry found" << endl;
      return 0;
   }  
      
   head = head.mid(begin,len);

   QRegExp regexp("charset *= *([^\\\\\\\"]+)");
   if( regexp.search( head ) > -1 )
   {
       charset = regexp.cap(1);
   }

   QTextCodec* codec=0;

   if(!charset.isEmpty())
   {
      // "CHARSET" is the default charset entry in a template (pot).
      // characters in a template should be either pure ascii or 
      // at least utf8, so utf8-codec can be used for both.
      if( charset == "CHARSET")
      {
          codec=QTextCodec::codecForName("utf8");
          kdDebug(KBABEL) 
              << QString("file seems to be a template: using utf8 encoding.")
              << endl;
      }
      else
      {
         codec=QTextCodec::codecForName(charset.latin1());
      }

      if(!codec)
      {
         kdWarning() << "charset found, but no codec available, using UTF8 instead" << endl;
       codec=QTextCodec::codecForName("utf8");
      }
   }

   return codec;
}

PoInfo PoInfo::headerInfo(const CatalogItem& headerItem)
{
   QStringList header=headerItem.msgstrAsList();

   QStringList::Iterator it;

   PoInfo info;

   // extract information from the header
   for(it=header.begin();it!=header.end();++it)
   {
      if((*it).contains(QRegExp("^\\s*Project-Id-Version\\s*:\\s*.+\\s*$")))
      {
         info.project=(*it).replace(QRegExp("^\\s*Project-Id-Version\\s*:\\s*"),"");

         if(info.project.right(2)=="\\n")
            info.project.remove(info.project.length()-2,2);
         
         info.project=info.project.simplifyWhiteSpace();
      }
      else if((*it).contains(QRegExp("^\\s*POT-Creation-Date\\s*:\\s*.+\\s*$")))
      {
         info.creation=(*it).replace(QRegExp("^\\s*POT-Creation-Date\\s*:\\s*"),"");

         if(info.creation.right(2)=="\\n")
            info.creation.remove(info.creation.length()-2,2);

         info.creation=info.creation.simplifyWhiteSpace();
      }
      else if((*it).contains(QRegExp("^\\s*PO-Revision-Date\\s*:\\s*.+\\s*$")))
      {
         info.revision=(*it).replace(QRegExp("^\\s*PO-Revision-Date\\s*:\\s*"),"");

         if(info.revision.right(2)=="\\n")
            info.revision.remove(info.revision.length()-2,2);

         info.revision=info.revision.simplifyWhiteSpace();
      }
      else if((*it).contains(QRegExp("^\\s*Last-Translator\\s*:\\s*.+\\s*$")))
      {
         info.lastTranslator=(*it).replace(QRegExp("^\\s*Last-Translator\\s*:\\s*"),"");

         if(info.lastTranslator.right(2)=="\\n")
            info.lastTranslator.remove(info.lastTranslator.length()-2,2);

         info.lastTranslator=info.lastTranslator.simplifyWhiteSpace();
      }
      else if((*it).contains(QRegExp("^\\s*Language-Team\\s*:\\s*.+\\s*")))
      {
         info.languageTeam=(*it).replace(QRegExp("^\\s*Language-Team\\s*:\\s*"),"");

         if(info.languageTeam.right(2)=="\\n")
            info.languageTeam.remove(info.languageTeam.length()-2,2);

         info.languageTeam=info.languageTeam.simplifyWhiteSpace();
      }
      else if((*it).contains(QRegExp("^\\s*MIME-Version\\s*:\\s*.+\\s*")))
      {
         info.mimeVersion=(*it).replace(QRegExp("^\\s*MIME-Version\\s*:\\s*"),"");

         if(info.mimeVersion.right(2)=="\\n")
            info.mimeVersion.remove(info.mimeVersion.length()-2,2);

         info.mimeVersion=info.mimeVersion.simplifyWhiteSpace();
      }
      else if((*it).contains(QRegExp("^\\s*Content-Type\\s*:\\s*.+\\s*")))
      {
         info.contentType=(*it).replace(QRegExp("^\\s*Content-Type\\s*:\\s*"),"");

         if(info.contentType.right(2)=="\\n")
            info.contentType.remove(info.contentType.length()-2,2);

         info.contentType=info.contentType.simplifyWhiteSpace();
      }
      else if((*it).contains(QRegExp("^\\s*Content-Transfer-Encoding\\s*:\\s*.+\\s*")))
      {
         info.encoding=(*it).replace(QRegExp("^\\s*Content-Transfer-Encoding\\s*:\\s*"),"");

         if(info.encoding.right(2)=="\\n")
            info.encoding.remove(info.encoding.length()-2,2);

         info.encoding=info.encoding.simplifyWhiteSpace();
      }
      else
      {
          QString line=(*it);

         if(line.right(2)=="\\n")
            line.remove(line.length()-2,2);

         line=line.simplifyWhiteSpace();
         if(!info.others.isEmpty())
             info.others+='\n';

         info.others+=line;
      }


   }

   info.headerComment=headerItem.comment();

   return info;
}


ConversionStatus PoInfo::info(const QString& url, PoInfo& info, QStringList &wordList, bool updateWordList, bool interactive)
{
   stopStaticRead = false;

   if( !updateWordList && PoInfo::cacheFind( url, info ) )
       return OK;

   QString target;
   if(KIO::NetAccess::download(KURL( url ), target))
   {
       QFile file(target);

         // first check file with msgfmt to be sure, it is 
         // syntactically correct
         Msgfmt msgfmt;
         QString output;
         Msgfmt::Status stat = msgfmt.checkSyntax( target , output );
         if(stat == Msgfmt::SyntaxError)
         {
          KIO::NetAccess::removeTempFile(target);
              return PARSE_ERROR;
         }


       std::ifstream* stream = new std::ifstream( file.name().local8Bit());
       if( stream->is_open() )
       {
           CatalogItem temp;

           info.total=0;
           info.fuzzy=0;
           info.untranslated=0;

         GettextFlexLexer* lexer = new GettextFlexLexer( stream  );

         lexer->yylex();
         
           // now parse the rest of the file
           ConversionStatus success=OK;
         
           while( lexer->lastToken != T_EOF && success==OK)
           {
               if( interactive ) kapp->processEvents(10);
             
             if( stopStaticRead )
             {
                delete lexer;
                delete stream;
                return OK;
            }
             
             success=fastRead(temp,lexer,false);
             
               if(success==OK || success==RECOVERED_PARSE_ERROR)
               {
              success=OK;
              
              if( temp.comment().contains("\n#~") ) continue; // skip obsolete
              
                  if( temp.msgid().first().isEmpty()) //header
              {
                  if( temp.isFuzzy() )  temp.removeFuzzy();
                  
                  //find out the codec
                  QTextCodec* codec = codecForFile( temp.msgstr().first() );
                  if( !codec ) return PARSE_ERROR;
                  
                  // convert from UTF-8 using codec
                  temp.setComment( codec->toUnicode(temp.comment().utf8()) );
                  temp.setMsgstr( codec->toUnicode(temp.msgstr().first().utf8()) );
                  
                  PoInfo infoCounts = info;
                  info=PoInfo::headerInfo(temp);
                  info.total = infoCounts.total;
                  info.fuzzy = infoCounts.fuzzy;
                  info.untranslated = infoCounts.untranslated;
                  continue; // do not update counters and word list for header
              }
                          
                  info.total++;

                  if(temp.isFuzzy())
                     info.fuzzy++;
                  else if(temp.isUntranslated())
                     info.untranslated++;
                 
              if( updateWordList )
              {
                // FIXME: should care about plural forms in msgid
                QString st = temp.msgid().first().simplifyWhiteSpace().lower();
                QStringList sl = QStringList::split( ' ', st );
                while(!sl.isEmpty())
                {
                  QString w = sl.first();
                  sl.pop_front();
                  if( !wordList.contains(w) ) wordList.append( w );
                }
                st = temp.msgstr().join(" " ).simplifyWhiteSpace().lower();
                sl = QStringList::split( ' ', st );
                while(!sl.isEmpty())
                {
                  QString w = sl.first();
                  sl.pop_front();
                  if( !wordList.contains(w) ) wordList.append( w );
                }
                st = temp.comment().simplifyWhiteSpace().lower();
                sl = QStringList::split( ' ', st );
                while(!sl.isEmpty())
                {
                  QString w = sl.first();
                  sl.pop_front();
                  if( !wordList.contains(w) ) wordList.append( w );
                }
                }
            }
           }

         delete lexer;
         delete stream;

           if(success==PARSE_ERROR)
           {
             KIO::NetAccess::removeTempFile(target);
               return PARSE_ERROR;
           }
       }
       else
       {
          delete stream;
          KIO::NetAccess::removeTempFile(target);
          return NO_PERMISSIONS;
       }

      KIO::NetAccess::removeTempFile(target);
      if( target == url )
          PoInfo::cacheSave( url, info );
        return OK;
   }
   else
   {
      return OS_ERROR;
   }

   return OK;
}

bool PoInfo::findInFile( const QString& url, FindOptions options )
{
   enum {Begin, Comment, Msgid, Msgstr} part = Begin;
   
   stopStaticRead = false;
   QString target;
   if(KIO::NetAccess::download(KURL( url ), target))
   {
       std::ifstream* stream = new std::ifstream( target.local8Bit()); 
       if(stream->is_open())
       {
           KIO::NetAccess::removeTempFile(target);
         
         GettextFlexLexer* lexer = new GettextFlexLexer( stream );

         lexer->yylex();

           // prepare the search
         
         QString searchStr = options.findStr;
         QRegExp regexp( searchStr );
         
         if( options.isRegExp ) 
            regexp.setCaseSensitive( options.caseSensitive );

           // first read header
         CatalogItem temp;
         
         ConversionStatus status = fastRead( temp, lexer, true );
         if( status != OK || !temp.msgid().first().isEmpty() ) 
         {
            delete lexer;
            delete stream;
            return false; // header is not at the beginning, broken file
         }

         QTextCodec* codec = codecForFile( temp.msgstr().first() );
         if( !codec ) 
         {
            return false;
         }
         
         // now parse the rest of the file
         QString text;
         int pos,len;
         
           while(lexer->lastToken != T_EOF)
           {
             switch( lexer->lastToken ) {
                 case T_COMMENT: {
                  part = Comment;
                  if( !options.inComment ) break;
                  text = codec->toUnicode(lexer->YYText()); 
                  if( options.isRegExp )
                      pos=regexp.search(text, 0 );
                  else 
                      pos=text.find(searchStr,0,options.caseSensitive);
                  if( pos >= 0)
                  {
                      if( options.wholeWords) {
                        len = searchStr.length();
                        QString pre = text.mid(pos-1,1);
                        QString post = text.mid(pos+len,1);
                        if( !pre.contains( QRegExp("[a-zA-Z0-9]")) &&
                            !post.contains( QRegExp("[a-zA-Z0-9]") )
                        ) {
                            delete lexer;
                            delete stream;
                            return true;
                        }
                      }
                      else {
                        delete lexer;
                        delete stream;
                        return true;
                      };
                  }
                  break;
               }
               case T_STRING: {
                  if( part == Msgid && !options.inMsgid ) break;
                  if( part == Msgstr && !options.inMsgstr ) break;
                  
                  text = codec->toUnicode(lexer->YYText()); 
                  
                  if( options.ignoreContextInfo )
                  {
                      pos = options.contextInfo.search(text);
                      len = options.contextInfo.matchedLength();
                      if( pos >= 0 )
                        text.remove( pos, len );
                  }
                
                  if( options.ignoreAccelMarker )
                  {
                      pos = text.find( options.accelMarker );
                      if( pos >= 0 )
                        text.remove( pos, 1 );
                  }
                  
                  if( options.isRegExp )
                      pos=regexp.search(text, 0 );
                  else 
                      pos=text.find(searchStr,0,options.caseSensitive);

                  if( pos >= 0)
                  {
                      if( options.wholeWords) {
                        len = searchStr.length();
                        QString pre = text.mid(pos-1,1);
                        QString post = text.mid(pos+len,1);
                        if( !pre.contains( QRegExp("[a-zA-Z0-9]")) &&
                            !post.contains( QRegExp("[a-zA-Z0-9]") )
                        ) {
                            delete lexer;
                            delete stream;
                            return true;
                        }
                      }
                      else {
                        delete lexer;
                        delete stream;
                        return true;
                      };
                  }
                  break;
               }
               case T_MSGSTR: {
                  part = Msgstr;
                  break;
               }
               case T_MSGID: 
               case T_MSGIDPLURAL: {
                  kapp->processEvents(10);
                  
                  // if stopped, return not found
                  if( stopStaticRead ) 
                  {
                      delete lexer;
                      delete stream;
                      return false;
                  }
                  part = Msgid;
                  break;
               }
             }
             lexer->yylex();
           }
         delete lexer;
         delete stream;
       }
    }
    return false;
}

// this does not like any incorrect files
00634 ConversionStatus PoInfo::fastRead( CatalogItem& item, GettextFlexLexer *lexer, bool storeText)
{
   item.clear();
   _gettextPluralForm = false;

    // comment
    if( lexer->lastToken == T_COMMENT )
    {
      QString _comment = QString::fromUtf8(lexer->YYText());
      while( lexer->yylex() == T_COMMENT )
          _comment += "\n"+QString::fromUtf8(lexer->YYText());
      item.setComment( _comment );
//    kdDebug(KBABEL) << "Comment: " << _comment << endl;
    }

    //obsolete
    if( lexer->lastToken == T_OBSOLETE ) {
      lexer->yylex();
      item.setComment("#~\n#~");
      return OK;
    }
        
    // msgid
    if( lexer->lastToken != T_MSGID ) return PARSE_ERROR;
    
    if( lexer->yylex() != T_STRING ) return PARSE_ERROR;
    QStringList msgids = item.msgid();
    QStringList::Iterator it = msgids.begin();
    *it = QString::fromUtf8(lexer->YYText());
    if( storeText )
      while( lexer->yylex() == T_STRING )
          (*it) += ("\n"+ QString::fromUtf8(lexer->YYText()) );
    else {
      if( lexer->yylex() == T_STRING ) // this is not header
      {
          *it = "SKIPPED";
          while( lexer->yylex() == T_STRING );
      }
    }
    item.setMsgid( msgids );
    
//    kdDebug(KBABEL) << "Msgid: " << *it << endl;

    if( lexer->lastToken == T_MSGIDPLURAL ) 
    {
      _gettextPluralForm = true;
      if( lexer->yylex() != T_STRING ) return PARSE_ERROR;
      QStringList msgids = item.msgid();
      it = msgids.fromLast();
      *it = QString::fromUtf8(lexer->YYText());
      if( storeText ) 
          while( lexer->yylex() == T_STRING )
            (*it)+="\n"+ QString::fromUtf8(lexer->YYText());
      else while( lexer->yylex() == T_STRING );
      item.setMsgid( msgids );
//    kdDebug(KBABEL) << "Msgid_plural: " << *it << endl;
    }
    
    // msgstr
    if( lexer->lastToken != T_MSGSTR ) return PARSE_ERROR;

    if( !_gettextPluralForm )
    {
      if( lexer->yylex() != T_STRING ) return PARSE_ERROR;
      
      QStringList msgstrs = item.msgstr();
      it = msgstrs.begin();
      *it = QString::fromUtf8(lexer->YYText());
      if( storeText || item.msgid().first().isEmpty() ) // if we should store the text or it is a header
          while( lexer->yylex() == T_STRING )
            (*it)+= ("\n"+ QString::fromUtf8(lexer->YYText()));
      else 
      if( lexer->yylex() == T_STRING ) // check next token, whether it is really translated
      {
          *it = "SKIPPED";
          while( lexer->yylex() == T_STRING );
      }
      item.setMsgstr( msgstrs );
//    kdDebug(KBABEL) << "Msgstr: " << *it << endl;
    } 
    else 
    {
      QStringList msgstrs = item.msgstr();
      QString s = QString::fromUtf8(lexer->YYText());
      while( lexer->lastToken == T_MSGSTR && s.contains( QRegExp("^msgstr\\[[0-9]+\\]" ) ) )
      {
          if( lexer->yylex() != T_STRING ) return PARSE_ERROR;
          it = msgstrs.fromLast();
          *it = QString::fromUtf8(lexer->YYText());
          
          if( storeText )
            do {
                (*it)+="\n"+QString::fromUtf8(lexer->YYText());
            } while( lexer->yylex() == T_STRING );
          else while( lexer->yylex() == T_STRING );
//        kdDebug(KBABEL) << "Msgstr: " << *it << endl;
          s = QString::fromUtf8(lexer->YYText());
      } 
      item.setMsgstr( msgstrs );
    }
    
    return OK;
}

Generated by  Doxygen 1.6.0   Back to index