kdecore Library API Documentation

kurl.cpp

00001 /*
00002     Copyright (C) 1999 Torben Weis <weis@kde.org>
00003 
00004     This library is free software; you can redistribute it and/or
00005     modify it under the terms of the GNU Library General Public
00006     License as published by the Free Software Foundation; either
00007     version 2 of the License, or (at your option) any later version.
00008 
00009     This library is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012     Library General Public License for more details.
00013 
00014     You should have received a copy of the GNU Library General Public License
00015     along with this library; see the file COPYING.LIB.  If not, write to
00016     the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
00017     Boston, MA 02111-1307, USA.
00018 */
00019 
00020 #include "kurl.h"
00021 
00022 #ifndef KDE_QT_ONLY
00023 #include <kdebug.h>
00024 #include <kglobal.h>
00025 #include <kidna.h>
00026 #include <kprotocolinfo.h>
00027 #endif
00028 
00029 #include <stdio.h>
00030 #include <assert.h>
00031 #include <ctype.h>
00032 #include <stdlib.h>
00033 #include <unistd.h>
00034 
00035 #include <qurl.h>
00036 #include <qdir.h>
00037 #include <qstringlist.h>
00038 #include <qregexp.h>
00039 #include <qstylesheet.h>
00040 #include <qmap.h>
00041 #include <qtextcodec.h>
00042 #include <qmutex.h>
00043 
00044 static const QString fileProt = "file";
00045 
00046 static QTextCodec * codecForHint( int encoding_hint /* not 0 ! */ )
00047 {
00048     return QTextCodec::codecForMib( encoding_hint );
00049 }
00050 
00051 static QString encode( const QString& segment, bool encode_slash, int encoding_hint )
00052 {
00053   const char *encode_string;
00054   if (encode_slash)
00055     encode_string = "<>#@\"&%?={}|^~[]\'`\\:+/";
00056   else
00057     encode_string = "<>#@\"&%?={}|^~[]\'`\\:+";
00058 
00059   QCString local;
00060   if (encoding_hint==0)
00061     local = segment.local8Bit();
00062   else
00063   {
00064       QTextCodec * textCodec = codecForHint( encoding_hint );
00065       if (!textCodec)
00066           local = segment.local8Bit();
00067       else
00068           local = textCodec->fromUnicode( segment );
00069   }
00070 
00071   int old_length = local.length();
00072 
00073   if ( !old_length )
00074     return segment.isNull() ? QString::null : QString(""); // differentiate null and empty
00075 
00076   // a worst case approximation
00077   QChar *new_segment = new QChar[ old_length * 3 + 1 ];
00078   int new_length = 0;
00079 
00080   for ( int i = 0; i < old_length; i++ )
00081   {
00082     // 'unsave' and 'reserved' characters
00083     // according to RFC 1738,
00084     // 2.2. URL Character Encoding Issues (pp. 3-4)
00085     // WABA: Added non-ascii
00086     unsigned char character = local[i];
00087     if ( (character <= 32) || (character >= 127) ||
00088          strchr(encode_string, character) )
00089     {
00090       new_segment[ new_length++ ] = '%';
00091 
00092       unsigned int c = character / 16;
00093       c += (c > 9) ? ('A' - 10) : '0';
00094       new_segment[ new_length++ ] = c;
00095 
00096       c = character % 16;
00097       c += (c > 9) ? ('A' - 10) : '0';
00098       new_segment[ new_length++ ] = c;
00099 
00100     }
00101     else
00102       new_segment[ new_length++ ] = local[i];
00103   }
00104 
00105   QString result = QString(new_segment, new_length);
00106   delete [] new_segment;
00107   return result;
00108 }
00109 
00110 static QString encodeHost( const QString& segment, bool encode_slash, int encoding_hint )
00111 {
00112   // Hostnames are encoded differently
00113   // we use the IDNA transformation instead
00114 
00115   // Note: when merging qt-addon, use QResolver::domainToAscii here
00116 #ifndef KDE_QT_ONLY
00117   Q_UNUSED( encode_slash );
00118   Q_UNUSED( encoding_hint );
00119   return KIDNA::toAscii(segment);
00120 #else
00121   return encode(segment, encode_slash, encoding_hint);
00122 #endif
00123 }
00124 
00125 static int hex2int( unsigned int _char )
00126 {
00127   if ( _char >= 'A' && _char <='F')
00128     return _char - 'A' + 10;
00129   if ( _char >= 'a' && _char <='f')
00130     return _char - 'a' + 10;
00131   if ( _char >= '0' && _char <='9')
00132     return _char - '0';
00133   return -1;
00134 }
00135 
00136 // WABA: The result of lazy_encode isn't usable for a URL which
00137 // needs to satisfies RFC requirements. However, the following
00138 // operation will make it usable again:
00139 //      encode(decode(...))
00140 //
00141 // As a result one can see that url.prettyURL() does not result in
00142 // a RFC compliant URL but that the following sequence does:
00143 //      KURL(url.prettyURL()).url()
00144 
00145 
00146 static QString lazy_encode( const QString& segment )
00147 {
00148   int old_length = segment.length();
00149 
00150   if ( !old_length )
00151     return QString::null;
00152 
00153   // a worst case approximation
00154   QChar *new_segment = new QChar[ old_length * 3 + 1 ];
00155   int new_length = 0;
00156 
00157   for ( int i = 0; i < old_length; i++ )
00158   {
00159     unsigned int character = segment[i].unicode(); // Don't use latin1()
00160                                                    // It returns 0 for non-latin1 values
00161     // Small set of really ambiguous chars
00162     if ((character < 32) ||  // Low ASCII
00163         ((character == '%') && // The escape character itself
00164            (i+2 < old_length) && // But only if part of a valid escape sequence!
00165           (hex2int(segment[i+1].unicode())!= -1) &&
00166           (hex2int(segment[i+2].unicode())!= -1)) ||
00167         (character == '?') || // Start of query delimiter
00168         (character == '@') || // Username delimiter
00169         (character == '#') || // Start of reference delimiter
00170         ((character == 32) && (i+1 == old_length))) // A trailing space
00171     {
00172       new_segment[ new_length++ ] = '%';
00173 
00174       unsigned int c = character / 16;
00175       c += (c > 9) ? ('A' - 10) : '0';
00176       new_segment[ new_length++ ] = c;
00177 
00178       c = character % 16;
00179       c += (c > 9) ? ('A' - 10) : '0';
00180       new_segment[ new_length++ ] = c;
00181     }
00182     else
00183     new_segment[ new_length++ ] = segment[i];
00184   }
00185 
00186   QString result = QString(new_segment, new_length);
00187   delete [] new_segment;
00188   return result;
00189 }
00190 
00191 static void decode( const QString& segment, QString &decoded, QString &encoded, int encoding_hint=0, bool updateDecoded = true )
00192 {
00193   decoded = QString::null;
00194   encoded = segment;
00195 
00196   int old_length = segment.length();
00197   if ( !old_length )
00198     return;
00199 
00200   QTextCodec *textCodec = 0;
00201   if (encoding_hint)
00202       textCodec = codecForHint( encoding_hint );
00203 
00204   if (!textCodec)
00205       textCodec = QTextCodec::codecForLocale();
00206 
00207   QCString csegment = textCodec->fromUnicode(segment);
00208   // Check if everything went ok
00209   if (textCodec->toUnicode(csegment) != segment)
00210   {
00211       // Uh oh
00212       textCodec = codecForHint( 106 ); // Fall back to utf-8
00213       csegment = textCodec->fromUnicode(segment);
00214   } 
00215   old_length = csegment.length();
00216 
00217   int new_length = 0;
00218   int new_length2 = 0;
00219 
00220   // make a copy of the old one
00221   char *new_segment = new char[ old_length + 1 ];
00222   QChar *new_usegment = new QChar[ old_length * 3 + 1 ];
00223 
00224   int i = 0;
00225   while( i < old_length )
00226   {
00227     bool bReencode = false;
00228     unsigned char character = csegment[ i++ ];
00229     if ((character <= ' ') || (character > 127))
00230        bReencode = true;
00231 
00232     new_usegment [ new_length2++ ] = character;
00233     if (character == '%' )
00234     {
00235       int a = i+1 < old_length ? hex2int( csegment[i] ) : -1;
00236       int b = i+1 < old_length ? hex2int( csegment[i+1] ) : -1;
00237       if ((a == -1) || (b == -1)) // Only replace if sequence is valid
00238       {
00239          // Contains stray %, make sure to re-encode!
00240          bReencode = true;
00241       }
00242       else
00243       {
00244          // Valid %xx sequence
00245          character = a * 16 + b; // Replace with value of %dd
00246          if (!character && updateDecoded)
00247             break; // Stop at %00
00248 
00249          new_usegment [ new_length2++ ] = (unsigned char) csegment[i++];
00250          new_usegment [ new_length2++ ] = (unsigned char) csegment[i++];
00251       }
00252     }
00253     if (bReencode)
00254     {
00255       new_length2--;
00256       new_usegment [ new_length2++ ] = '%';
00257 
00258       unsigned int c = character / 16;
00259       c += (c > 9) ? ('A' - 10) : '0';
00260       new_usegment[ new_length2++ ] = c;
00261 
00262       c = character % 16;
00263       c += (c > 9) ? ('A' - 10) : '0';
00264       new_usegment[ new_length2++ ] = c;
00265     }
00266 
00267     new_segment [ new_length++ ] = character;
00268   }
00269   new_segment [ new_length ] = 0;
00270 
00271   encoded = QString( new_usegment, new_length2);
00272 
00273   // Encoding specified
00274   if (updateDecoded)
00275   {
00276      QByteArray array;
00277      array.setRawData(new_segment, new_length);
00278      decoded = textCodec->toUnicode( array, new_length );
00279      array.resetRawData(new_segment, new_length);
00280      QCString validate = textCodec->fromUnicode(decoded);
00281 
00282      if (strcmp(validate.data(), new_segment) != 0)
00283      {
00284         decoded = QString::fromLocal8Bit(new_segment, new_length);
00285      }
00286   }
00287 
00288   delete [] new_segment;
00289   delete [] new_usegment;
00290 }
00291 
00292 static QString decode(const QString &segment, int encoding_hint = 0)
00293 {
00294   QString result;
00295   QString tmp;
00296   decode(segment, result, tmp, encoding_hint);
00297   return result;
00298 }
00299 
00300 static QString cleanpath(const QString &_path, bool cleanDirSeparator, bool decodeDots)
00301 {
00302   if (_path.isEmpty()) return QString::null;
00303   
00304   if (_path[0] != '/')
00305      return _path; // Don't mangle mailto-style URLs
00306   
00307   QString path = _path;
00308 
00309   int len = path.length();
00310 
00311   if (decodeDots)
00312   {
00313 #ifndef KDE_QT_ONLY
00314      static const QString &encodedDot = KGlobal::staticQString("%2e");
00315 #else
00316      QString encodedDot("%2e");
00317 #endif
00318      if (path.find(encodedDot, 0, false) != -1)
00319      {
00320 #ifndef KDE_QT_ONLY
00321         static const QString &encodedDOT = KGlobal::staticQString("%2E"); // Uppercase!
00322 #else
00323         QString encodedDOT("%2E");
00324 #endif
00325         path.replace(encodedDot, ".");
00326         path.replace(encodedDOT, ".");
00327         len = path.length();
00328      }
00329   }
00330 
00331   bool slash = (len && path[len-1] == '/') ||
00332                (len > 1 && path[len-2] == '/' && path[len-1] == '.');
00333 
00334   // The following code cleans up directory path much like
00335   // QDir::cleanDirPath() except it can be made to ignore multiple
00336   // directory separators by setting the flag to false.  That fixes
00337   // bug# 15044, mail.altavista.com and other similar brain-dead server
00338   // implementations that do not follow what has been specified in
00339   // RFC 2396!! (dA)
00340   QString result;
00341   int cdUp, orig_pos, pos;
00342 
00343   cdUp = 0;
00344   pos = orig_pos = len;
00345   while ( pos && (pos = path.findRev('/',--pos)) != -1 )
00346   {
00347     len = orig_pos - pos - 1;
00348     if ( len == 2 && path[pos+1] == '.' && path[pos+2] == '.' )
00349       cdUp++;
00350     else
00351     {
00352       // Ignore any occurrences of '.'
00353       // This includes entries that simply do not make sense like /..../
00354       if ( (len || !cleanDirSeparator) &&
00355            (len != 1 || path[pos+1] != '.' ) )
00356       {
00357           if ( !cdUp )
00358               result.prepend(path.mid(pos, len+1));
00359           else
00360               cdUp--;
00361       }
00362     }
00363     orig_pos = pos;
00364   }
00365 
00366   if ( result.isEmpty() )
00367     result = "/";
00368   else if ( slash && result[result.length()-1] != '/' )
00369        result.append('/');
00370 
00371   return result;
00372 }
00373 
00374 bool KURL::isRelativeURL(const QString &_url)
00375 {
00376   int len = _url.length();
00377   if (!len) return true; // Very short relative URL.
00378   const QChar *str = _url.unicode();
00379 
00380   // Absolute URL must start with alpha-character
00381   if (!isalpha(str[0].latin1()))
00382      return true; // Relative URL
00383 
00384   for(int i = 1; i < len; i++)
00385   {
00386      char c = str[i].latin1(); // Note: non-latin1 chars return 0!
00387      if (c == ':')
00388         return false; // Absolute URL
00389 
00390      // Protocol part may only contain alpha, digit, + or -
00391      if (!isalpha(c) && !isdigit(c) && (c != '+') && (c != '-'))
00392         return true; // Relative URL
00393   }
00394   // URL did not contain ':'
00395   return true; // Relative URL
00396 }
00397 
00398 KURL::List::List(const KURL &url)
00399 {
00400     append( url );
00401 }
00402 
00403 KURL::List::List(const QStringList &list)
00404 {
00405   for (QStringList::ConstIterator it = list.begin();
00406        it != list.end();
00407        it++)
00408     {
00409       append( KURL(*it) );
00410     }
00411 }
00412 
00413 QStringList KURL::List::toStringList() const
00414 {
00415   QStringList lst;
00416    for( KURL::List::ConstIterator it = begin();
00417         it != end();
00418         it++)
00419    {
00420       lst.append( (*it).url() );
00421    }
00422    return lst;
00423 }
00424 
00425 
00426 KURL::KURL()
00427 {
00428   reset();
00429 }
00430 
00431 KURL::~KURL()
00432 {
00433 }
00434 
00435 
00436 KURL::KURL( const QString &url, int encoding_hint )
00437 {
00438   reset();
00439   parse( url, encoding_hint );
00440 }
00441 
00442 KURL::KURL( const char * url, int encoding_hint )
00443 {
00444   reset();
00445   parse( QString::fromLatin1(url), encoding_hint );
00446 }
00447 
00448 KURL::KURL( const QCString& url, int encoding_hint )
00449 {
00450   reset();
00451   parse( QString::fromLatin1(url), encoding_hint );
00452 }
00453 
00454 KURL::KURL( const KURL& _u )
00455 {
00456   *this = _u;
00457 }
00458 
00459 QDataStream & operator<< (QDataStream & s, const KURL & a)
00460 {
00461   QString QueryForWire=a.m_strQuery_encoded;
00462   if (!a.m_strQuery_encoded.isNull())
00463     QueryForWire.prepend("?");
00464 
00465     s << a.m_strProtocol << a.m_strUser << a.m_strPass << a.m_strHost
00466       << a.m_strPath << a.m_strPath_encoded << QueryForWire << a.m_strRef_encoded
00467       << Q_INT8(a.m_bIsMalformed ? 1 : 0) << a.m_iPort;
00468     return s;
00469 }
00470 
00471 QDataStream & operator>> (QDataStream & s, KURL & a)
00472 {
00473     Q_INT8 malf;
00474     QString QueryFromWire;
00475     s >> a.m_strProtocol >> a.m_strUser >> a.m_strPass >> a.m_strHost
00476       >> a.m_strPath >> a.m_strPath_encoded >> QueryFromWire >> a.m_strRef_encoded
00477       >> malf >> a.m_iPort;
00478     a.m_bIsMalformed = (malf != 0);
00479 
00480     if ( QueryFromWire.isEmpty() )
00481       a.m_strQuery_encoded = QString::null;
00482     else
00483       a.m_strQuery_encoded = QueryFromWire.mid(1);
00484 
00485     a.m_iUriMode = KURL::uriModeForProtocol( a.m_strProtocol );
00486 
00487     return s;
00488 }
00489 
00490 #ifndef QT_NO_NETWORKPROTOCOL
00491 KURL::KURL( const QUrl &u )
00492 {
00493   *this = u;
00494 }
00495 #endif
00496 
00497 KURL::KURL( const KURL& _u, const QString& _rel_url, int encoding_hint )
00498 {
00499   if (_u.hasSubURL()) // Operate on the last suburl, not the first
00500   {
00501     KURL::List lst = split( _u );
00502     KURL u(lst.last(), _rel_url, encoding_hint);
00503     lst.remove( lst.last() );
00504     lst.append( u );
00505     *this = join( lst );
00506     return;
00507   }
00508   // WORKAROUND THE RFC 1606 LOOPHOLE THAT ALLOWS
00509   // http:/index.html AS A VALID SYNTAX FOR RELATIVE
00510   // URLS. ( RFC 2396 section 5.2 item # 3 )
00511   QString rUrl = _rel_url;
00512   int len = _u.m_strProtocol.length();
00513   if ( !_u.m_strHost.isEmpty() && !rUrl.isEmpty() &&
00514        rUrl.find( _u.m_strProtocol, 0, false ) == 0 &&
00515        rUrl[len] == ':' && (rUrl[len+1] != '/' ||
00516        (rUrl[len+1] == '/' && rUrl[len+2] != '/')) )
00517   {
00518     rUrl.remove( 0, rUrl.find( ':' ) + 1 );
00519   }
00520 
00521   if ( rUrl.isEmpty() )
00522   {
00523     *this = _u;
00524   }
00525   else if ( rUrl[0] == '#' )
00526   {
00527     *this = _u;
00528     QString ref = decode(rUrl.mid(1), encoding_hint);
00529     if ( ref.isNull() )
00530         ref = ""; // we know there was an (empty) html ref, we saw the '#'
00531     setHTMLRef( ref );
00532   }
00533   else if ( isRelativeURL( rUrl) )
00534   {
00535     *this = _u;
00536     m_strQuery_encoded = QString::null;
00537     m_strRef_encoded = QString::null;
00538     if ( rUrl[0] == '/')
00539     {
00540         if ((rUrl.length() > 1) && (rUrl[1] == '/'))
00541         {
00542            m_strHost = QString::null;
00543         }
00544         m_strPath = QString::null;
00545         m_strPath_encoded = QString::null;
00546     }
00547     else if ( rUrl[0] != '?' )
00548     {
00549        int pos = m_strPath.findRev( '/' );
00550        if (pos >= 0)
00551           m_strPath.truncate(pos);
00552        m_strPath += '/';
00553        if (!m_strPath_encoded.isEmpty())
00554        {
00555           pos = m_strPath_encoded.findRev( '/' );
00556           if (pos >= 0)
00557              m_strPath_encoded.truncate(pos);
00558           m_strPath_encoded += '/';
00559        }
00560     }
00561     else
00562     {
00563        if ( m_strPath.isEmpty() )
00564           m_strPath = '/';
00565     }
00566     KURL tmp( url() + rUrl, encoding_hint);
00567     *this = tmp;
00568     cleanPath(false);
00569   }
00570   else
00571   {
00572     KURL tmp( rUrl, encoding_hint);
00573     *this = tmp;
00574     // Preserve userinfo if applicable.
00575     if (!_u.m_strUser.isEmpty() && m_strUser.isEmpty() && (_u.m_strHost == m_strHost) && (_u.m_strProtocol == m_strProtocol))
00576     {
00577        m_strUser = _u.m_strUser;
00578        m_strPass = _u.m_strPass;
00579     }
00580     cleanPath(false);
00581   }
00582 }
00583 
00584 void KURL::reset()
00585 {
00586   m_strProtocol = QString::null;
00587   m_strUser = QString::null;
00588   m_strPass = QString::null;
00589   m_strHost = QString::null;
00590   m_strPath = QString::null;
00591   m_strPath_encoded = QString::null;
00592   m_strQuery_encoded = QString::null;
00593   m_strRef_encoded = QString::null;
00594   m_bIsMalformed = true;
00595   m_iPort = 0;
00596   m_iUriMode = Auto;
00597 }
00598 
00599 bool KURL::isEmpty() const
00600 {
00601   return (m_strPath.isEmpty() && m_strProtocol.isEmpty());
00602 }
00603 
00604 void KURL::parse( const QString& _url, int encoding_hint )
00605 {
00606     if ( _url.isEmpty() || m_iUriMode == Invalid )
00607     {
00608     m_strProtocol = _url;
00609     m_iUriMode = Invalid;
00610     return;
00611     }
00612 
00613     const QChar* buf = _url.unicode();
00614     const QChar* orig = buf;
00615     uint len = _url.length();
00616     uint pos = 0;
00617 
00618     // Node 1: Accept alpha or slash
00619     QChar x = buf[pos++];
00620     if ( x == '/' )
00621     {
00622     // A slash means we immediately proceed to parse it as a file URL.
00623     m_iUriMode = URL;
00624     m_strProtocol = fileProt;
00625     parseURL( _url, encoding_hint );
00626     return;
00627     }
00628     if ( !isalpha( (int)x ) )
00629     goto NodeErr;
00630 
00631     // Node 2: Accept any amount of (alpha|digit|'+'|'-')
00632     // '.' is not currently accepted, because current KURL may be confused.
00633     // Proceed with :// :/ or :
00634     while( pos < len && (isalpha((int)buf[pos]) || isdigit((int)buf[pos]) ||
00635              buf[pos] == '+' || buf[pos] == '-')) pos++;
00636 
00637     if (pos < len && buf[pos] == ':' )
00638     {
00639     m_strProtocol = QString( orig, pos ).lower();
00640     if ( m_iUriMode == Auto )
00641         m_iUriMode = uriModeForProtocol( m_strProtocol );
00642     // Proceed to correct parse function.
00643     switch ( m_iUriMode )
00644   {
00645     case RawURI:
00646         parseRawURI( _url );
00647         return;
00648     case Mailto:
00649         parseMailto( _url );
00650         return;
00651     case URL:
00652         parseURL( _url, encoding_hint );
00653         return;
00654     default:
00655         // Unknown URI mode results in an invalid URI.
00656         break;
00657     }
00658     }
00659 
00660 NodeErr:
00661     reset();
00662     m_strProtocol = _url;
00663     m_iUriMode = Invalid;
00664 }
00665 
00666 void KURL::parseRawURI( const QString& _url, int encoding_hint )
00667 {
00668     uint len = _url.length();
00669     const QChar* buf = _url.unicode();
00670 
00671     uint pos = 0;
00672 
00673     // Accept any amount of (alpha|digit|'+'|'-')
00674     // '.' is not currently accepted, because current KURL may be confused.
00675     // Proceed with :
00676     while( pos < len && (isalpha((int)buf[pos]) || isdigit((int)buf[pos]) ||
00677              buf[pos] == '+' || buf[pos] == '-')) pos++;
00678 
00679     // Note that m_strProtocol is already set here, so we just skip over the protocol.
00680     if (pos < len && buf[pos] == ':' )
00681     pos++;
00682     else {
00683     reset();
00684     m_strProtocol = _url;
00685     m_iUriMode = Invalid;
00686     return;
00687     }
00688 
00689     if ( pos == len )
00690     m_strPath = QString::null;
00691     else
00692     m_strPath = decode( QString( buf + pos, len - pos ), encoding_hint );
00693 
00694     m_bIsMalformed = false;
00695 
00696     return;
00697 }
00698 
00699 void KURL::parseMailto( const QString& _url, int encoding_hint )
00700 {
00701     parseURL( _url, encoding_hint);
00702     if ( m_bIsMalformed )
00703     return;
00704     QRegExp mailre("(.+@)(.+)");
00705     if ( mailre.exactMatch( m_strPath ) )
00706     {
00707 #ifndef KDE_QT_ONLY
00708     QString host = KIDNA::toUnicode( mailre.cap( 2 ) );
00709     if (host.isEmpty())
00710         host = mailre.cap( 2 ).lower();
00711 #else
00712     QString host = mailre.cap( 2 ).lower();
00713 #endif
00714     m_strPath = mailre.cap( 1 ) + host;
00715   }
00716 }
00717 
00718 void KURL::parseURL( const QString& _url, int encoding_hint )
00719 {
00720   QString port;
00721   bool badHostName = false;
00722   int start = 0;
00723   uint len = _url.length();
00724   const QChar* buf = _url.unicode();
00725 
00726   QChar delim;
00727   QString tmp;
00728 
00729   uint pos = 0;
00730 
00731   // Node 1: Accept alpha or slash
00732   QChar x = buf[pos++];
00733   if ( x == '/' )
00734     goto Node9;
00735   if ( !isalpha( (int)x ) )
00736     goto NodeErr;
00737 
00738   // Node 2: Accept any amount of (alpha|digit|'+'|'-')
00739   // '.' is not currently accepted, because current KURL may be confused.
00740   // Proceed with :// :/ or :
00741   while( pos < len && (isalpha((int)buf[pos]) || isdigit((int)buf[pos]) ||
00742           buf[pos] == '+' || buf[pos] == '-')) pos++;
00743 
00744   // Note that m_strProtocol is already set here, so we just skip over the protocol.
00745   if ( pos+2 < len && buf[pos] == ':' && buf[pos+1] == '/' && buf[pos+2] == '/' )
00746     {
00747       pos += 3;
00748     }
00749   else if (pos+1 < len && buf[pos] == ':' ) // Need to always compare length()-1 otherwise KURL passes "http:" as legal!!
00750     {
00751       pos++;
00752       start = pos;
00753       goto Node9;
00754     }
00755   else
00756     goto NodeErr;
00757 
00758   //Node 3: We need at least one character here
00759   if ( pos == len )
00760       goto NodeErr;
00761   start = pos;
00762 
00763   // Node 4: Accept any amount of characters.
00764   if (buf[pos] == '[')     // An IPv6 host follows.
00765       goto Node8;
00766   // Terminate on / or @ or ? or # or " or ; or <
00767   x = buf[pos];
00768   while( (x != ':') && (x != '@') && (x != '/') && (x != '?') && (x != '#') )
00769   {
00770      if ((x == '\"') || (x == ';') || (x == '<'))
00771         badHostName = true;
00772      if (++pos == len)
00773         break;
00774      x = buf[pos];
00775   }
00776   if ( pos == len )
00777     {
00778       if (badHostName)
00779          goto NodeErr;
00780 
00781       setHost(decode(QString( buf + start, pos - start ), encoding_hint));
00782       goto NodeOk;
00783     }
00784   if ( x == '@' )
00785     {
00786       m_strUser = decode(QString( buf + start, pos - start ), encoding_hint);
00787       pos++;
00788       goto Node7;
00789     }
00790   else if ( (x == '/') || (x == '?') || (x == '#'))
00791     {
00792       if (badHostName)
00793          goto NodeErr;
00794 
00795       setHost(decode(QString( buf + start, pos - start ), encoding_hint));
00796       start = pos;
00797       goto Node9;
00798     }
00799   else if ( x != ':' )
00800     goto NodeErr;
00801   m_strUser = decode(QString( buf + start, pos - start ), encoding_hint);
00802   pos++;
00803 
00804   // Node 5: We need at least one character
00805   if ( pos == len )
00806     goto NodeErr;
00807   start = pos++;
00808 
00809   // Node 6: Read everything until @, /, ? or #
00810   while( (pos < len) &&
00811         (buf[pos] != '@') &&
00812         (buf[pos] != '/') &&
00813         (buf[pos] != '?') &&
00814         (buf[pos] != '#')) pos++;
00815   // If we now have a '@' the ':' seperates user and password.
00816   // Otherwise it seperates host and port.
00817   if ( (pos == len) || (buf[pos] != '@') )
00818     {
00819       // Ok the : was used to separate host and port
00820       if (badHostName)
00821          goto NodeErr;
00822       setHost(m_strUser);
00823       m_strUser = QString::null;
00824       QString tmp( buf + start, pos - start );
00825       char *endptr;
00826       m_iPort = (unsigned short int)strtol(tmp.ascii(), &endptr, 10);
00827       if ((pos == len) && (strlen(endptr) == 0))
00828         goto NodeOk;
00829       // there is more after the digits
00830       pos -= strlen(endptr);
00831       if ((buf[pos] != '@') &&
00832           (buf[pos] != '/') &&
00833           (buf[pos] != '?') &&
00834           (buf[pos] != '#'))
00835         goto NodeErr;
00836 
00837       start = pos++;
00838       goto Node9;
00839     }
00840   m_strPass = decode(QString( buf + start, pos - start), encoding_hint);
00841   pos++;
00842 
00843   // Node 7: We need at least one character
00844  Node7:
00845   if ( pos == len )
00846     goto NodeErr;
00847 
00848  Node8:
00849   if (buf[pos] == '[')
00850   {
00851     // IPv6 address
00852     start = ++pos; // Skip '['
00853 
00854     if (pos == len)
00855     {
00856        badHostName = true;
00857        goto NodeErr;
00858     }
00859     // Node 8b: Read everything until ] or terminate
00860     badHostName = false;
00861     x = buf[pos];
00862     while( (x != ']') )
00863     {
00864        if ((x == '\"') || (x == ';') || (x == '<'))
00865           badHostName = true;
00866        if (++pos == len)
00867        {
00868           badHostName = true;
00869           break;
00870        }
00871        x = buf[pos];
00872     }
00873     if (badHostName)
00874        goto NodeErr;
00875     setHost(decode(QString( buf + start, pos - start ), encoding_hint));
00876     if (pos < len) pos++; // Skip ']'
00877     if (pos == len)
00878        goto NodeOk;
00879   }
00880   else
00881   {
00882     // Non IPv6 address
00883     start = pos;
00884 
00885     // Node 8b: Read everything until / : or terminate
00886     badHostName = false;
00887     x = buf[pos];
00888     while( (x != ':') && (x != '@') && (x != '/') && (x != '?') && (x != '#') )
00889     {
00890        if ((x == '\"') || (x == ';') || (x == '<'))
00891           badHostName = true;
00892        if (++pos == len)
00893           break;
00894        x = buf[pos];
00895     }
00896     if (badHostName)
00897        goto NodeErr;
00898     if ( pos == len )
00899     {
00900        setHost(decode(QString( buf + start, pos - start ), encoding_hint));
00901        goto NodeOk;
00902     }
00903     setHost(decode(QString( buf + start, pos - start ), encoding_hint));
00904   }
00905   x = buf[pos];
00906   if ( x == '/' )
00907     {
00908       start = pos++;
00909       goto Node9;
00910     }
00911   else if ( x != ':' )
00912     goto NodeErr;
00913   pos++;
00914 
00915   // Node 8a: Accept at least one digit
00916   if ( pos == len )
00917     goto NodeErr;
00918   start = pos;
00919   if ( !isdigit( buf[pos++] ) )
00920     goto NodeErr;
00921 
00922   // Node 8b: Accept any amount of digits
00923   while( pos < len && isdigit( buf[pos] ) ) pos++;
00924   port = QString( buf + start, pos - start );
00925   m_iPort = port.toUShort();
00926   if ( pos == len )
00927     goto NodeOk;
00928   start = pos++;
00929 
00930  Node9: // parse path until query or reference reached
00931 
00932   while( pos < len && buf[pos] != '#' && buf[pos]!='?' ) pos++;
00933 
00934   tmp = QString( buf + start, pos - start );
00935   //kdDebug(126)<<" setting encoded path&query to:"<<tmp<<endl;
00936   setEncodedPath( tmp, encoding_hint );
00937 
00938   if ( pos == len )
00939       goto NodeOk;
00940 
00941  //Node10: // parse query or reference depending on what comes first
00942   delim = (buf[pos++]=='#'?'?':'#');
00943 
00944   start = pos;
00945 
00946   while(pos < len && buf[pos]!=delim ) pos++;
00947 
00948   tmp = QString(buf + start, pos - start);
00949   if (delim=='#')
00950       _setQuery(tmp, encoding_hint);
00951   else
00952       m_strRef_encoded = tmp;
00953 
00954   if (pos == len)
00955       goto NodeOk;
00956 
00957  //Node11: // feed the rest into the remaining variable
00958   tmp = QString( buf + pos + 1, len - pos - 1);
00959   if (delim == '#')
00960       m_strRef_encoded = tmp;
00961   else
00962       _setQuery(tmp, encoding_hint);
00963 
00964  NodeOk:
00965   //kdDebug(126)<<"parsing finished. m_strProtocol="<<m_strProtocol<<" m_strHost="<<m_strHost<<" m_strPath="<<m_strPath<<endl;
00966   m_bIsMalformed = false; // Valid URL
00967 
00968   //kdDebug()<<"Prot="<<m_strProtocol<<"\nUser="<<m_strUser<<"\nPass="<<m_strPass<<"\nHost="<<m_strHost<<"\nPath="<<m_strPath<<"\nQuery="<<m_strQuery_encoded<<"\nRef="<<m_strRef_encoded<<"\nPort="<<m_iPort<<endl;
00969   if (m_strProtocol.isEmpty())
00970   {
00971     m_iUriMode = URL;
00972     m_strProtocol = fileProt;
00973   }
00974   return;
00975 
00976  NodeErr:
00977 //  kdDebug(126) << "KURL couldn't parse URL \"" << _url << "\"" << endl;
00978   reset();
00979   m_strProtocol = _url;
00980   m_iUriMode = Invalid;
00981 }
00982 
00983 KURL& KURL::operator=( const QString& _url )
00984 {
00985   reset();
00986   parse( _url );
00987 
00988   return *this;
00989 }
00990 
00991 KURL& KURL::operator=( const char * _url )
00992 {
00993   reset();
00994   parse( QString::fromLatin1(_url) );
00995 
00996   return *this;
00997 }
00998 
00999 #ifndef QT_NO_NETWORKPROTOCOL
01000 KURL& KURL::operator=( const QUrl & u )
01001 {
01002   m_strProtocol = u.protocol();
01003   m_iUriMode = Auto;
01004   m_strUser = u.user();
01005   m_strPass = u.password();
01006   m_strHost = u.host();
01007   m_strPath = u.path( false );
01008   m_strPath_encoded = QString::null;
01009   m_strQuery_encoded = u.query();
01010   m_strRef_encoded = u.ref();
01011   m_bIsMalformed = !u.isValid();
01012   m_iPort = u.port();
01013 
01014   return *this;
01015 }
01016 #endif
01017 
01018 KURL& KURL::operator=( const KURL& _u )
01019 {
01020   m_strProtocol = _u.m_strProtocol;
01021   m_strUser = _u.m_strUser;
01022   m_strPass = _u.m_strPass;
01023   m_strHost = _u.m_strHost;
01024   m_strPath = _u.m_strPath;
01025   m_strPath_encoded = _u.m_strPath_encoded;
01026   m_strQuery_encoded = _u.m_strQuery_encoded;
01027   m_strRef_encoded = _u.m_strRef_encoded;
01028   m_bIsMalformed = _u.m_bIsMalformed;
01029   m_iPort = _u.m_iPort;
01030   m_iUriMode = _u.m_iUriMode;
01031 
01032   return *this;
01033 }
01034 
01035 bool KURL::operator<( const KURL& _u) const
01036 {
01037   int i;
01038   if (!_u.isValid())
01039   {
01040      if (!isValid())
01041      {
01042         i = m_strProtocol.compare(_u.m_strProtocol);
01043         return (i < 0);
01044      }
01045      return false;
01046   }
01047   if (!isValid())
01048      return true;
01049   
01050   i = m_strProtocol.compare(_u.m_strProtocol);
01051   if (i) return (i < 0);
01052 
01053   i = m_strHost.compare(_u.m_strHost);
01054   if (i) return (i < 0);
01055 
01056   if (m_iPort != _u.m_iPort) return (m_iPort < _u.m_iPort);
01057 
01058   i = m_strPath.compare(_u.m_strPath);
01059   if (i) return (i < 0);
01060 
01061   i = m_strQuery_encoded.compare(_u.m_strQuery_encoded);
01062   if (i) return (i < 0);
01063 
01064   i = m_strRef_encoded.compare(_u.m_strRef_encoded);
01065   if (i) return (i < 0);
01066 
01067   i = m_strUser.compare(_u.m_strUser);
01068   if (i) return (i < 0);
01069   
01070   i = m_strPass.compare(_u.m_strPass);
01071   if (i) return (i < 0);
01072 
01073   return false;    
01074 }
01075 
01076 bool KURL::operator==( const KURL& _u ) const
01077 {
01078   if ( !isValid() || !_u.isValid() )
01079     return false;
01080 
01081   if ( m_strProtocol == _u.m_strProtocol &&
01082        m_strUser == _u.m_strUser &&
01083        m_strPass == _u.m_strPass &&
01084        m_strHost == _u.m_strHost &&
01085        m_strPath == _u.m_strPath &&
01086        // The encoded path may be null, but the URLs are still equal (David)
01087        ( m_strPath_encoded.isNull() || _u.m_strPath_encoded.isNull() ||
01088          m_strPath_encoded == _u.m_strPath_encoded ) &&
01089        m_strQuery_encoded == _u.m_strQuery_encoded &&
01090        m_strRef_encoded == _u.m_strRef_encoded &&
01091        m_iPort == _u.m_iPort )
01092   {
01093     return true;
01094   }
01095 
01096   return false;
01097 }
01098 
01099 bool KURL::operator==( const QString& _u ) const
01100 {
01101   KURL u( _u );
01102   return ( *this == u );
01103 }
01104 
01105 bool KURL::cmp( const KURL &u, bool ignore_trailing ) const
01106 {
01107   return equals( u, ignore_trailing );
01108 }
01109 
01110 bool KURL::equals( const KURL &_u, bool ignore_trailing ) const
01111 {
01112   if ( !isValid() || !_u.isValid() )
01113     return false;
01114 
01115   if ( ignore_trailing )
01116   {
01117     QString path1 = path(1);
01118     QString path2 = _u.path(1);
01119     if ( path1 != path2 )
01120       return false;
01121 
01122     if ( m_strProtocol == _u.m_strProtocol &&
01123          m_strUser == _u.m_strUser &&
01124          m_strPass == _u.m_strPass &&
01125          m_strHost == _u.m_strHost &&
01126          m_strQuery_encoded == _u.m_strQuery_encoded &&
01127          m_strRef_encoded == _u.m_strRef_encoded &&
01128          m_iPort == _u.m_iPort )
01129       return true;
01130 
01131     return false;
01132   }
01133 
01134   return ( *this == _u );
01135 }
01136 
01137 bool KURL::isParentOf( const KURL& _u ) const
01138 {
01139   if ( !isValid() || !_u.isValid() )
01140     return false;
01141 
01142   if ( m_strProtocol == _u.m_strProtocol &&
01143        m_strUser == _u.m_strUser &&
01144        m_strPass == _u.m_strPass &&
01145        m_strHost == _u.m_strHost &&
01146        m_strQuery_encoded == _u.m_strQuery_encoded &&
01147        m_strRef_encoded == _u.m_strRef_encoded &&
01148        m_iPort == _u.m_iPort )
01149   {
01150     if ( path().isEmpty() || _u.path().isEmpty() )
01151         return false; // can't work with implicit paths
01152 
01153     QString p1( cleanpath( path(), true, false ) );
01154     if ( p1[p1.length()-1] != '/' )
01155         p1 += '/';
01156     QString p2( cleanpath( _u.path(), true, false ) );
01157     if ( p2[p2.length()-1] != '/' )
01158         p2 += '/';
01159 
01160     //kdDebug(126) << "p1=" << p1 << endl;
01161     //kdDebug(126) << "p2=" << p2 << endl;
01162     //kdDebug(126) << "p1.length()=" << p1.length() << endl;
01163     //kdDebug(126) << "p2.left(!$)=" << p2.left( p1.length() ) << endl;
01164     return p2.startsWith( p1 );
01165   }
01166   return false;
01167 }
01168 
01169 void KURL::setFileName( const QString& _txt )
01170 {
01171   m_strRef_encoded = QString::null;
01172   int i = 0;
01173   while( _txt[i] == '/' ) ++i;
01174   QString tmp;
01175   if ( i )
01176     tmp = _txt.mid( i );
01177   else
01178     tmp = _txt;
01179 
01180   QString path = m_strPath_encoded.isEmpty() ? m_strPath : m_strPath_encoded;
01181   if ( path.isEmpty() )
01182     path = "/";
01183   else
01184   {
01185     int lastSlash = path.findRev( '/' );
01186     if ( lastSlash == -1)
01187     {
01188       // The first character is not a '/' ???
01189       // This looks strange ...
01190       path = "/";
01191     }
01192     else if ( path.right(1) != "/" )
01193       path.truncate( lastSlash+1 ); // keep the "/"
01194   }
01195   if (m_strPath_encoded.isEmpty())
01196   {
01197      path += tmp;
01198      setPath( path );
01199   }
01200   else
01201   {
01202      path += encode_string(tmp);
01203      setEncodedPath( path );
01204   }
01205   cleanPath();
01206 }
01207 
01208 void KURL::cleanPath( bool cleanDirSeparator ) // taken from the old KURL
01209 {
01210   if (m_iUriMode != URL) return;
01211   m_strPath = cleanpath(m_strPath, cleanDirSeparator, false);
01212   // WABA: Is this safe when "/../" is encoded with %?
01213   m_strPath_encoded = cleanpath(m_strPath_encoded, cleanDirSeparator, true);
01214 }
01215 
01216 static QString trailingSlash( int _trailing, const QString &path )
01217 {
01218   QString result = path;
01219 
01220   if ( _trailing == 0 )
01221     return result;
01222   else if ( _trailing == 1 )
01223   {
01224     int len = result.length();
01225     if ( (len == 0) || (result[ len - 1 ] != '/') )
01226       result += "/";
01227     return result;
01228   }
01229   else if ( _trailing == -1 )
01230   {
01231     if ( result == "/" )
01232       return result;
01233     int len = result.length();
01234     if ( (len != 0) && (result[ len - 1 ] == '/') )
01235       result.truncate( len - 1 );
01236     return result;
01237   }
01238   else {
01239     assert( 0 );
01240     return QString::null;
01241   }
01242 }
01243 
01244 void KURL::adjustPath( int _trailing )
01245 {
01246   if (!m_strPath_encoded.isEmpty())
01247   {
01248      m_strPath_encoded = trailingSlash( _trailing, m_strPath_encoded );
01249   }
01250   m_strPath = trailingSlash( _trailing, m_strPath );
01251 }
01252 
01253 
01254 QString KURL::encodedPathAndQuery( int _trailing, bool _no_empty_path, int encoding_hint ) const
01255 {
01256   QString tmp;
01257   if (!m_strPath_encoded.isEmpty() && encoding_hint == 0)
01258   {
01259      tmp = trailingSlash( _trailing, m_strPath_encoded );
01260   }
01261   else
01262   {
01263      tmp = path( _trailing );
01264      if ( _no_empty_path && tmp.isEmpty() )
01265         tmp = "/";
01266      tmp = encode( tmp, false, encoding_hint );
01267   }
01268 
01269   // TODO apply encoding_hint to the query
01270   if (!m_strQuery_encoded.isNull())
01271       tmp += '?' + m_strQuery_encoded;
01272   return tmp;
01273 }
01274 
01275 void KURL::setEncodedPath( const QString& _txt, int encoding_hint )
01276 {
01277   m_strPath_encoded = _txt;
01278 
01279   decode( m_strPath_encoded, m_strPath, m_strPath_encoded, encoding_hint );
01280   // Throw away encoding for local files, makes file-operations faster.
01281   if (m_strProtocol == fileProt)
01282      m_strPath_encoded = QString::null;
01283 
01284   if ( m_iUriMode == Auto )
01285     m_iUriMode = URL;
01286 }
01287 
01288 
01289 void KURL::setEncodedPathAndQuery( const QString& _txt, int encoding_hint )
01290 {
01291   int pos = _txt.find( '?' );
01292   if ( pos == -1 )
01293   {
01294     setEncodedPath(_txt, encoding_hint);
01295     m_strQuery_encoded = QString::null;
01296   }
01297   else
01298   {
01299     setEncodedPath(_txt.left( pos ), encoding_hint);
01300     _setQuery(_txt.right(_txt.length() - pos - 1), encoding_hint);
01301   }
01302 }
01303 
01304 QString KURL::path( int _trailing ) const
01305 {
01306   return trailingSlash( _trailing, path() );
01307 }
01308 
01309 bool KURL::isLocalFile() const
01310 {
01311   if ( (m_strProtocol != fileProt ) || hasSubURL() )
01312      return false;
01313      
01314   if (m_strHost.isEmpty() || (m_strHost == "localhost"))
01315      return true;
01316      
01317   char hostname[ 256 ];
01318   hostname[ 0 ] = '\0';
01319   if (!gethostname( hostname, 255 ))
01320      hostname[sizeof(hostname)-1] = '\0';
01321      
01322   for(char *p = hostname; *p; p++)
01323      *p = tolower(*p);
01324      
01325   return (m_strHost == hostname);
01326 }
01327 
01328 void KURL::setFileEncoding(const QString &encoding)
01329 {
01330   if (!isLocalFile())
01331      return;
01332 
01333   QString q = query();
01334 
01335   if (!q.isEmpty() && (q[0] == '?'))
01336      q = q.mid(1);
01337 
01338   QStringList args = QStringList::split('&', q);
01339   for(QStringList::Iterator it = args.begin();
01340       it != args.end();)
01341   {
01342       QString s = decode_string(*it);
01343       if (s.startsWith("charset="))
01344          it = args.erase(it);
01345       else
01346          ++it;
01347   }
01348   if (!encoding.isEmpty())
01349      args.append("charset="+encode_string(encoding));
01350 
01351   if (args.isEmpty())
01352      _setQuery(QString::null);
01353   else
01354      _setQuery(args.join("&"));
01355 }
01356 
01357 QString KURL::fileEncoding() const
01358 {
01359   if (!isLocalFile())
01360      return QString::null;
01361 
01362   QString q = query();
01363 
01364   if (q.isEmpty())
01365      return QString::null;
01366 
01367   if (q[0] == '?')
01368      q = q.mid(1);
01369 
01370   QStringList args = QStringList::split('&', q);
01371   for(QStringList::ConstIterator it = args.begin();
01372       it != args.end();
01373       ++it)
01374   {
01375       QString s = decode_string(*it);
01376       if (s.startsWith("charset="))
01377          return s.mid(8);
01378   }
01379   return QString::null;
01380 }
01381 
01382 bool KURL::hasSubURL() const
01383 {
01384   if ( m_strProtocol.isEmpty() || m_bIsMalformed )
01385     return false;
01386   if (m_strRef_encoded.isEmpty())
01387      return false;
01388   if (m_strRef_encoded.startsWith("gzip:"))
01389      return true;
01390   if (m_strRef_encoded.startsWith("bzip:"))
01391      return true;
01392   if (m_strRef_encoded.startsWith("bzip2:"))
01393      return true;
01394   if (m_strRef_encoded.startsWith("tar:"))
01395      return true;
01396   if (m_strRef_encoded.startsWith("ar:"))
01397      return true;
01398   if (m_strRef_encoded.startsWith("zip:"))
01399      return true;
01400   if ( m_strProtocol == "error" ) // anything that starts with error: has suburls
01401      return true;
01402   return false;
01403 }
01404 
01405 QString KURL::url( int _trailing, int encoding_hint ) const
01406 {
01407   if( m_bIsMalformed )
01408   {
01409     // Return the whole url even when the url is
01410     // malformed.  Under such conditions the url
01411     // is stored in m_strProtocol.
01412     return m_strProtocol;
01413   }
01414 
01415   QString u = m_strProtocol;
01416   if (!u.isEmpty())
01417     u += ":";
01418 
01419   if ( hasHost() )
01420   {
01421     u += "//";
01422     if ( hasUser() )
01423     {
01424       u += encode(m_strUser, true, encoding_hint);
01425       if ( hasPass() )
01426       {
01427         u += ":";
01428         u += encode(m_strPass, true, encoding_hint);
01429       }
01430       u += "@";
01431     }
01432     if ( m_iUriMode == URL )
01433     {
01434       bool IPv6 = (m_strHost.find(':') != -1);
01435       if (IPv6)
01436         u += '[' + m_strHost + ']';
01437       else
01438         u += encodeHost(m_strHost, true, encoding_hint);
01439       if ( m_iPort != 0 ) {
01440         QString buffer;
01441         buffer.sprintf( ":%u", m_iPort );
01442         u += buffer;
01443       }
01444     }
01445     else
01446     {
01447       u += m_strHost;
01448     }
01449   }
01450 
01451   if ( m_iUriMode == URL || m_iUriMode == Mailto )
01452     u += encodedPathAndQuery( _trailing, false, encoding_hint );
01453   else
01454     u += m_strPath;
01455 
01456   if ( hasRef() )
01457   {
01458     u += "#";
01459     u += m_strRef_encoded;
01460   }
01461 
01462   return u;
01463 }
01464 
01465 QString KURL::prettyURL( int _trailing ) const
01466 {
01467   if( m_bIsMalformed )
01468   {
01469     // Return the whole url even when the url is
01470     // malformed.  Under such conditions the url
01471     // is stored in m_strProtocol.
01472     return m_strProtocol;
01473   }
01474 
01475   QString u = m_strProtocol;
01476   if (!u.isEmpty())
01477      u += ":";
01478 
01479   if ( hasHost() )
01480   {
01481     u += "//";
01482     if ( hasUser() )
01483     {
01484       u += lazy_encode(m_strUser);
01485       // Don't show password!
01486       u += "@";
01487     }
01488     if ( m_iUriMode == URL )
01489     {
01490     bool IPv6 = (m_strHost.find(':') != -1);
01491     if (IPv6)
01492     {
01493        u += '[' + m_strHost + ']';
01494     }
01495     else
01496     {
01497        u += lazy_encode(m_strHost);
01498     }
01499     }
01500     else
01501     {
01502       u += lazy_encode(m_strHost);
01503     }
01504     if ( m_iPort != 0 ) {
01505       QString buffer;
01506       buffer.sprintf( ":%u", m_iPort );
01507       u += buffer;
01508     }
01509   }
01510 
01511   u += trailingSlash( _trailing, lazy_encode( m_strPath ) );
01512   if (!m_strQuery_encoded.isNull())
01513       u += '?' + m_strQuery_encoded;
01514 
01515   if ( hasRef() )
01516   {
01517     u += "#";
01518     u += m_strRef_encoded;
01519   }
01520 
01521   return u;
01522 }
01523 
01524 QString KURL::prettyURL( int _trailing, AdjustementFlags _flags) const
01525 {
01526     QString u = prettyURL(_trailing);
01527     if (_flags & StripFileProtocol && u.startsWith("file:"))
01528         u.remove(0, 5);
01529     return u;
01530 }
01531 
01532 QString KURL::htmlURL() const
01533 {
01534   return QStyleSheet::escape(prettyURL());
01535 }
01536 
01537 KURL::List KURL::split( const KURL& _url )
01538 {
01539   QString ref;
01540   KURL::List lst;
01541   KURL url = _url;
01542 
01543   while(true)
01544   {
01545      KURL u = url;
01546      u.m_strRef_encoded = QString::null;
01547      lst.append(u);
01548      if (url.hasSubURL())
01549      {
01550         url = KURL(url.m_strRef_encoded);
01551      }
01552      else
01553      {
01554         ref = url.m_strRef_encoded;
01555         break;
01556      }
01557   }
01558 
01559   // Set HTML ref in all URLs.
01560   KURL::List::Iterator it;
01561   for( it = lst.begin() ; it != lst.end(); ++it )
01562   {
01563      (*it).m_strRef_encoded = ref;
01564   }
01565 
01566   return lst;
01567 }
01568 
01569 KURL::List KURL::split( const QString& _url )
01570 {
01571   return split(KURL(_url));
01572 }
01573 
01574 KURL KURL::join( const KURL::List & lst )
01575 {
01576   if (lst.isEmpty()) return KURL();
01577   KURL tmp;
01578 
01579   KURL::List::ConstIterator first = lst.fromLast();
01580   for( KURL::List::ConstIterator it = first; it != lst.end(); --it )
01581   {
01582      KURL u(*it);
01583      if (it != first)
01584      {
01585         if (!u.m_strRef_encoded) u.m_strRef_encoded = tmp.url();
01586         else u.m_strRef_encoded += "#" + tmp.url(); // Support more than one suburl thingy
01587      }
01588      tmp = u;
01589   }
01590 
01591   return tmp;
01592 }
01593 
01594 QString KURL::fileName( bool _strip_trailing_slash ) const
01595 {
01596   QString fname;
01597   if (hasSubURL()) { // If we have a suburl, then return the filename from there
01598     KURL::List list = KURL::split(*this);
01599     KURL::List::Iterator it = list.fromLast();
01600     return (*it).fileName(_strip_trailing_slash);
01601   }
01602   const QString &path = m_strPath;
01603 
01604   int len = path.length();
01605   if ( len == 0 )
01606     return fname;
01607 
01608   if ( _strip_trailing_slash )
01609   {
01610     while ( len >= 1 && path[ len - 1 ] == '/' )
01611       len--;
01612   }
01613   else if ( path[ len - 1 ] == '/' )
01614     return fname;
01615 
01616   // Does the path only consist of '/' characters ?
01617   if ( len == 1 && path[ 0 ] == '/' )
01618     return fname;
01619 
01620   // Skip last n slashes
01621   int n = 1;
01622   if (!m_strPath_encoded.isEmpty())
01623   {
01624      // This is hairy, we need the last unencoded slash.
01625      // Count in the encoded string how many encoded slashes follow the last
01626      // unencoded one.
01627      int i = m_strPath_encoded.findRev( '/', len - 1 );
01628      QString fileName_encoded = m_strPath_encoded.mid(i+1);
01629      n += fileName_encoded.contains("%2f", false);
01630   }
01631   int i = len;
01632   do {
01633     i = path.findRev( '/', i - 1 );
01634   }
01635   while (--n && (i > 0));
01636 
01637   // If ( i == -1 ) => the first character is not a '/'
01638   // So it's some URL like file:blah.tgz, return the whole path
01639   if ( i == -1 ) {
01640     if ( len == (int)path.length() )
01641       fname = path;
01642     else
01643       // Might get here if _strip_trailing_slash is true
01644       fname = path.left( len );
01645   }
01646   else
01647   {
01648      fname = path.mid( i + 1, len - i - 1 ); // TO CHECK
01649   }
01650   return fname;
01651 }
01652 
01653 void KURL::addPath( const QString& _txt )
01654 {
01655   if (hasSubURL())
01656   {
01657      KURL::List lst = split( *this );
01658      KURL &u = lst.last();
01659      u.addPath(_txt);
01660      *this = join( lst );
01661      return;
01662   }
01663   
01664   m_strPath_encoded = QString::null;
01665 
01666   if ( _txt.isEmpty() )
01667     return;
01668 
01669   int i = 0;
01670   int len = m_strPath.length();
01671   // NB: avoid three '/' when building a new path from nothing
01672   if ( len == 0 ) {
01673     while( _txt[i] == '/' ) ++i;
01674   }
01675   // Add the trailing '/' if it is missing
01676   else if ( _txt[0] != '/' && ( len == 0 || m_strPath[ len - 1 ] != '/' ) )
01677     m_strPath += "/";
01678 
01679   // No double '/' characters
01680   i = 0;
01681   if ( len != 0 && m_strPath[ len - 1 ] == '/' )
01682   {
01683     while( _txt[i] == '/' )
01684       ++i;
01685   }
01686 
01687   m_strPath += _txt.mid( i );
01688 }
01689 
01690 QString KURL::directory( bool _strip_trailing_slash_from_result,
01691                          bool _ignore_trailing_slash_in_path ) const
01692 {
01693   QString result = m_strPath_encoded.isEmpty() ? m_strPath : m_strPath_encoded;
01694   if ( _ignore_trailing_slash_in_path )
01695     result = trailingSlash( -1, result );
01696 
01697   if ( result.isEmpty() || result == "/" )
01698     return result;
01699 
01700   int i = result.findRev( "/" );
01701   // If ( i == -1 ) => the first character is not a '/'
01702   // So it's some URL like file:blah.tgz, with no path
01703   if ( i == -1 )
01704     return QString::null;
01705 
01706   if ( i == 0 )
01707   {
01708     result = "/";
01709     return result;
01710   }
01711 
01712   if ( _strip_trailing_slash_from_result )
01713     result = result.left( i );
01714   else
01715     result = result.left( i + 1 );
01716 
01717   if (!m_strPath_encoded.isEmpty())
01718     result = decode(result);
01719 
01720   return result;
01721 }
01722 
01723 
01724 bool KURL::cd( const QString& _dir )
01725 {
01726   if ( _dir.isEmpty() || m_bIsMalformed )
01727     return false;
01728 
01729   if (hasSubURL())
01730   {
01731      KURL::List lst = split( *this );
01732      KURL &u = lst.last();
01733      u.cd(_dir);
01734      *this = join( lst );
01735      return true;
01736   }
01737 
01738   // absolute path ?
01739   if ( _dir[0] == '/' )
01740   {
01741     m_strPath_encoded = QString::null;
01742     m_strPath = _dir;
01743     setHTMLRef( QString::null );
01744     m_strQuery_encoded = QString::null;
01745     return true;
01746   }
01747 
01748   // Users home directory on the local disk ?
01749   if ( ( _dir[0] == '~' ) && ( m_strProtocol == fileProt ))
01750   {
01751     m_strPath_encoded = QString::null;
01752     m_strPath = QDir::homeDirPath();
01753     m_strPath += "/";
01754     m_strPath += _dir.right(m_strPath.length() - 1);
01755     setHTMLRef( QString::null );
01756     m_strQuery_encoded = QString::null;
01757     return true;
01758   }
01759 
01760   // relative path
01761   // we always work on the past of the first url.
01762   // Sub URLs are not touched.
01763 
01764   // append '/' if necessary
01765   QString p = path(1);
01766   p += _dir;
01767   p = cleanpath( p, true, false );
01768   setPath( p );
01769 
01770   setHTMLRef( QString::null );
01771   m_strQuery_encoded = QString::null;
01772 
01773   return true;
01774 }
01775 
01776 KURL KURL::upURL( ) const
01777 {
01778   if (!query().isEmpty())
01779   {
01780      KURL u(*this);
01781      u._setQuery(QString::null);
01782      return u;
01783   };
01784 
01785   if (!hasSubURL())
01786   {
01787      KURL u(*this);
01788      u.cd("../");
01789      return u;
01790   }
01791 
01792   // We have a subURL.
01793   KURL::List lst = split( *this );
01794   if (lst.isEmpty())
01795       return KURL(); // Huh?
01796   while (true)
01797   {
01798      KURL &u = lst.last();
01799      QString old = u.path();
01800      u.cd("../");
01801      if (u.path() != old)
01802          break; // Finshed.
01803      if (lst.count() == 1)
01804          break; // Finished.
01805      lst.remove(lst.fromLast());
01806   }
01807   return join( lst );
01808 }
01809 
01810 QString KURL::htmlRef() const
01811 {
01812   if ( !hasSubURL() )
01813   {
01814     return decode( ref() );
01815   }
01816 
01817   List lst = split( *this );
01818   return decode( (*lst.begin()).ref() );
01819 }
01820 
01821 QString KURL::encodedHtmlRef() const
01822 {
01823   if ( !hasSubURL() )
01824   {
01825     return ref();
01826   }
01827 
01828   List lst = split( *this );
01829   return (*lst.begin()).ref();
01830 }
01831 
01832 void KURL::setHTMLRef( const QString& _ref )
01833 {
01834   if ( !hasSubURL() )
01835   {
01836     m_strRef_encoded = encode( _ref, true, 0 /*?*/);
01837     return;
01838   }
01839 
01840   List lst = split( *this );
01841 
01842   (*lst.begin()).setRef( encode( _ref, true, 0 /*?*/) );
01843 
01844   *this = join( lst );
01845 }
01846 
01847 bool KURL::hasHTMLRef() const
01848 {
01849   if ( !hasSubURL() )
01850   {
01851     return hasRef();
01852   }
01853 
01854   List lst = split( *this );
01855   return (*lst.begin()).hasRef();
01856 }
01857 
01858 void
01859 KURL::setProtocol( const QString& _txt )
01860 {
01861    m_strProtocol = _txt;
01862    if ( m_iUriMode == Auto ) m_iUriMode = uriModeForProtocol( m_strProtocol );
01863    m_bIsMalformed = false;
01864 }
01865 
01866 void
01867 KURL::setUser( const QString& _txt )
01868 {
01869    m_strUser = _txt;
01870 }
01871 
01872 void
01873 KURL::setPass( const QString& _txt )
01874 {
01875    m_strPass = _txt;
01876 }
01877 
01878 void
01879 KURL::setHost( const QString& _txt )
01880 {
01881   if ( m_iUriMode == Auto )
01882     m_iUriMode = URL;
01883   switch ( m_iUriMode )
01884   {
01885   case URL:
01886 #ifndef KDE_QT_ONLY
01887    m_strHost = KIDNA::toUnicode(_txt);
01888    if (m_strHost.isEmpty())
01889       m_strHost = _txt.lower(); // Probably an invalid hostname, but...
01890 #else
01891    m_strHost = _txt.lower();
01892 #endif
01893     break;
01894   default:
01895     m_strHost = _txt;
01896     break;
01897   }
01898 }
01899 
01900 void
01901 KURL::setPort( unsigned short int _p )
01902 {
01903    m_iPort = _p;
01904 }
01905 
01906 void KURL::setPath( const QString & path )
01907 {
01908   if (isEmpty())
01909     m_bIsMalformed = false;
01910   if (m_strProtocol.isEmpty())
01911   {
01912     m_strProtocol = fileProt;
01913   }
01914   m_strPath = path;
01915   m_strPath_encoded = QString::null;
01916   if ( m_iUriMode == Auto )
01917     m_iUriMode = URL;
01918 }
01919 
01920 void KURL::setDirectory( const QString &dir)
01921 {
01922   if ( dir.endsWith("/"))
01923      setPath(dir);
01924   else
01925      setPath(dir+"/");
01926 }
01927 
01928 void KURL::setQuery( const QString &_txt, int encoding_hint)
01929 {
01930    if (_txt[0] == '?')
01931       _setQuery( _txt.mid(1), encoding_hint );
01932    else
01933       _setQuery( _txt, encoding_hint );
01934 }
01935 
01936 // This is a private function that expects a query without '?'
01937 void KURL::_setQuery( const QString &_txt, int encoding_hint)
01938 {
01939    m_strQuery_encoded = _txt;
01940    if (!_txt.length())
01941       return;
01942 
01943    int l = m_strQuery_encoded.length();
01944    int i = 0;
01945    QString result;
01946    while (i < l)
01947    {
01948       int s = i;
01949       // Re-encode. Break encoded string up according to the reserved
01950       // characters '&:;=/?' and re-encode part by part.
01951       while(i < l)
01952       {
01953          char c = m_strQuery_encoded[i].latin1();
01954          if ((c == '&') || (c == ':') || (c == ';') ||
01955              (c == '=') || (c == '/') || (c == '?'))
01956             break;
01957          i++;
01958       }
01959       if (i > s)
01960       {
01961          QString tmp = m_strQuery_encoded.mid(s, i-s);
01962          QString newTmp;
01963          decode( tmp, newTmp, tmp, encoding_hint, false );
01964          result += tmp;
01965       }
01966       if (i < l)
01967       {
01968          result += m_strQuery_encoded[i];
01969          i++;
01970       }
01971    }
01972    m_strQuery_encoded = result;
01973 }
01974 
01975 QString KURL::query() const
01976 {
01977     if (m_strQuery_encoded.isNull())
01978         return QString::null;
01979     return '?'+m_strQuery_encoded;
01980 }
01981 
01982 QString KURL::decode_string(const QString &str, int encoding_hint)
01983 {
01984    return decode(str, encoding_hint);
01985 }
01986 
01987 QString KURL::encode_string(const QString &str, int encoding_hint)
01988 {
01989    return encode(str, false, encoding_hint);
01990 }
01991 
01992 QString KURL::encode_string_no_slash(const QString &str, int encoding_hint)
01993 {
01994    return encode(str, true, encoding_hint);
01995 }
01996 
01997 bool urlcmp( const QString& _url1, const QString& _url2 )
01998 {
01999   // Both empty ?
02000   if ( _url1.isEmpty() && _url2.isEmpty() )
02001     return true;
02002   // Only one empty ?
02003   if ( _url1.isEmpty() || _url2.isEmpty() )
02004     return false;
02005 
02006   KURL::List list1 = KURL::split( _url1 );
02007   KURL::List list2 = KURL::split( _url2 );
02008 
02009   // Malformed ?
02010   if ( list1.isEmpty() || list2.isEmpty() )
02011     return false;
02012 
02013   return ( list1 == list2 );
02014 }
02015 
02016 bool urlcmp( const QString& _url1, const QString& _url2, bool _ignore_trailing, bool _ignore_ref )
02017 {
02018   // Both empty ?
02019   if ( _url1.isEmpty() && _url2.isEmpty() )
02020     return true;
02021   // Only one empty ?
02022   if ( _url1.isEmpty() || _url2.isEmpty() )
02023     return false;
02024 
02025   KURL::List list1 = KURL::split( _url1 );
02026   KURL::List list2 = KURL::split( _url2 );
02027 
02028   // Malformed ?
02029   if ( list1.isEmpty() || list2.isEmpty() )
02030     return false;
02031 
02032   unsigned int size = list1.count();
02033   if ( list2.count() != size )
02034     return false;
02035 
02036   if ( _ignore_ref )
02037   {
02038     (*list1.begin()).setRef(QString::null);
02039     (*list2.begin()).setRef(QString::null);
02040   }
02041 
02042   KURL::List::Iterator it1 = list1.begin();
02043   KURL::List::Iterator it2 = list2.begin();
02044   for( ; it1 != list1.end() ; ++it1, ++it2 )
02045     if ( !(*it1).equals( *it2, _ignore_trailing ) )
02046       return false;
02047 
02048   return true;
02049 }
02050 
02051 QMap< QString, QString > KURL::queryItems( int options ) const {
02052   return queryItems(options, 0);
02053 }
02054 
02055 QMap< QString, QString > KURL::queryItems( int options, int encoding_hint ) const {
02056   if ( m_strQuery_encoded.isEmpty() )
02057     return QMap<QString,QString>();
02058 
02059   QMap< QString, QString > result;
02060   QStringList items = QStringList::split( '&', m_strQuery_encoded );
02061   for ( QStringList::const_iterator it = items.begin() ; it != items.end() ; ++it ) {
02062     int equal_pos = (*it).find( '=' );
02063     if ( equal_pos > 0 ) { // = is not the first char...
02064       QString name = (*it).left( equal_pos );
02065       if ( options & CaseInsensitiveKeys )
02066     name = name.lower();
02067       QString value = (*it).mid( equal_pos + 1 );
02068       if ( value.isEmpty() )
02069     result.insert( name, QString::fromLatin1("") );
02070       else {
02071     // ### why is decoding name not necessary?
02072     value.replace( '+', ' ' ); // + in queries means space
02073     result.insert( name, decode_string( value, encoding_hint ) );
02074       }
02075     } else if ( equal_pos < 0 ) { // no =
02076       QString name = (*it);
02077       if ( options & CaseInsensitiveKeys )
02078     name = name.lower();
02079       result.insert( name, QString::null );
02080     }
02081   }
02082 
02083   return result;
02084 }
02085 
02086 QString KURL::queryItem( const QString& _item ) const
02087 {
02088   return queryItem( _item, 0 );
02089 }
02090 
02091 QString KURL::queryItem( const QString& _item, int encoding_hint ) const
02092 {
02093   QString item = _item + '=';
02094   if ( m_strQuery_encoded.length() <= 1 )
02095     return QString::null;
02096 
02097   QStringList items = QStringList::split( '&', m_strQuery_encoded );
02098   unsigned int _len = item.length();
02099   for ( QStringList::ConstIterator it = items.begin(); it != items.end(); ++it )
02100   {
02101     if ( (*it).startsWith( item ) )
02102     {
02103       if ( (*it).length() > _len )
02104       {
02105         QString str = (*it).mid( _len );
02106         str.replace( '+', ' ' ); // + in queries means space.
02107         return decode_string( str, encoding_hint );
02108       }
02109       else // empty value
02110         return QString::fromLatin1("");
02111     }
02112   }
02113 
02114   return QString::null;
02115 }
02116 
02117 void KURL::removeQueryItem( const QString& _item )
02118 {
02119   QString item = _item + '=';
02120   if ( m_strQuery_encoded.length() <= 1 )
02121     return;
02122 
02123   QStringList items = QStringList::split( '&', m_strQuery_encoded );
02124   for ( QStringList::Iterator it = items.begin(); it != items.end(); )
02125   {
02126     if ( (*it).startsWith( item ) || (*it == _item) )
02127     {
02128       QStringList::Iterator deleteIt = it;
02129       ++it;
02130       items.remove(deleteIt);
02131     }
02132     else
02133     {
02134        ++it;
02135     }
02136   }
02137   m_strQuery_encoded = items.join( "&" );
02138 }
02139 
02140 void KURL::addQueryItem( const QString& _item, const QString& _value, int encoding_hint )
02141 {
02142   QString item = _item + '=';
02143   QString value = encode( _value, true, encoding_hint );
02144 
02145   if (!m_strQuery_encoded.isEmpty())
02146      m_strQuery_encoded += '&';
02147   m_strQuery_encoded += item + value;
02148 }
02149 
02150 // static
02151 KURL KURL::fromPathOrURL( const QString& text )
02152 {
02153     if ( text.isEmpty() )
02154         return KURL();
02155     
02156     KURL url;
02157     if ( text[0] == '/' )
02158         url.setPath( text );
02159     else
02160         url = text;
02161 
02162     return url;
02163 }
02164 
02165 static QString _relativePath(const QString &base_dir, const QString &path, bool &isParent)
02166 {
02167    QString _base_dir(QDir::cleanDirPath(base_dir));
02168    QString _path(QDir::cleanDirPath(path.isEmpty() || (path[0] != '/') ? _base_dir+"/"+path : path));
02169 
02170    if (_base_dir.isEmpty())
02171       return _path;
02172 
02173    if (_base_dir[_base_dir.length()-1] != '/')
02174       _base_dir.append('/');
02175 
02176    QStringList list1 = QStringList::split('/', _base_dir);
02177    QStringList list2 = QStringList::split('/', _path);
02178                                                                    
02179    // Find where they meet
02180    uint level = 0;
02181    uint maxLevel = QMIN(list1.count(), list2.count());
02182    while((level < maxLevel) && (list1[level] == list2[level])) level++;
02183   
02184    QString result;                                                                       
02185    // Need to go down out of the first path to the common branch.
02186    for(uint i = level; i < list1.count(); i++)
02187       result.append("../");
02188 
02189    // Now up up from the common branch to the second path.
02190    for(uint i = level; i < list2.count(); i++)
02191       result.append(list2[i]).append("/");
02192 
02193    if ((level < list2.count()) && (path[path.length()-1] != '/'))
02194       result.truncate(result.length()-1);
02195 
02196    isParent = (level == list1.count());
02197 
02198    return result;
02199 }
02200 
02201 QString KURL::relativePath(const QString &base_dir, const QString &path, bool *isParent)
02202 {
02203    bool parent;
02204    QString result = _relativePath(base_dir, path, parent);
02205    if (parent)
02206       result.prepend("./");
02207       
02208    if (isParent)
02209       *isParent = parent;
02210    
02211    return result;
02212 }
02213 
02214 
02215 QString KURL::relativeURL(const KURL &base_url, const KURL &url, int encoding_hint)
02216 {
02217    if ((url.protocol() != base_url.protocol()) ||
02218        (url.host() != base_url.host()) ||
02219        (url.port() && url.port() != base_url.port()) ||
02220        (url.hasUser() && url.user() != base_url.user()) ||
02221        (url.hasPass() && url.pass() != base_url.pass()))
02222    {
02223       return url.url(0, encoding_hint);
02224    }
02225 
02226    QString relURL;
02227    
02228    if ((base_url.path() != url.path()) || (base_url.query() != url.query()))
02229    {
02230       bool dummy;
02231       QString basePath = base_url.directory(false, false);
02232       relURL = encode( _relativePath(basePath, url.path(), dummy), false, encoding_hint);
02233       relURL += url.query();
02234    }
02235 
02236    if ( url.hasRef() )
02237    {
02238       relURL += "#";
02239       relURL += url.ref();
02240    }
02241 
02242    if ( relURL.isEmpty() )
02243       return "./";
02244 
02245    return relURL;
02246 }
02247 
02248 int KURL::uriMode() const
02249 {
02250   return m_iUriMode;
02251 }
02252 
02253 KURL::URIMode KURL::uriModeForProtocol(const QString& protocol)
02254 {
02255 #ifndef KDE_QT_ONLY
02256     KURL::URIMode mode = Auto;
02257     if (KGlobal::_instance)
02258         mode = KProtocolInfo::uriParseMode(protocol);
02259     if (mode == Auto ) {
02260 #else
02261         KURL::URIMode mode = Auto;
02262 #endif
02263     if ( protocol == "ed2k" || protocol == "sig2dat" || protocol == "slsk" || protocol == "data" ) mode = RawURI;
02264     else if ( protocol == "mailto" ) mode = Mailto;
02265     else mode = URL;
02266 #ifndef KDE_QT_ONLY
02267     }
02268 #endif
02269     return mode;
02270 }
KDE Logo
This file is part of the documentation for kdecore Library Version 3.2.0.
Documentation copyright © 1996-2004 the KDE developers.
Generated on Wed Feb 4 12:33:52 2004 by doxygen 1.2.18 written by Dimitri van Heesch, © 1997-2003