Logo Search packages:      
Sourcecode: poco version File versions  Download package

RegularExpression.h

//
// RegularExpression.h
//
// $Id: //poco/1.3/Foundation/include/Poco/RegularExpression.h#2 $
//
// Library: Foundation
// Package: RegExp
// Module:  RegularExpression
//
// Definitions of class RegularExpression.
//
// A wrapper class for Philip Hazel's PCRE - Perl Compatible Regular Expressions
// library (http://www.pcre.org).
//
// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH.
// and Contributors.
//
// Permission is hereby granted, free of charge, to any person or organization
// obtaining a copy of the software and accompanying documentation covered by
// this license (the "Software") to use, reproduce, display, distribute,
// execute, and transmit the Software, and to prepare derivative works of the
// Software, and to permit third-parties to whom the Software is furnished to
// do so, all subject to the following:
// 
// The copyright notices in the Software and this entire statement, including
// the above license grant, this restriction and the following disclaimer,
// must be included in all copies of the Software, in whole or in part, and
// all derivative works of the Software, unless such copies or derivative
// works are solely in the form of machine-executable object code generated by
// a source language processor.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
//


#ifndef Foundation_RegularExpression_INCLUDED
#define Foundation_RegularExpression_INCLUDED


#include "Poco/Foundation.h"
#include <vector>


//
// Copy these definitions from pcre.h
// to avoid pulling in the entire header file
//
extern "C"
{
      struct real_pcre;
      typedef struct real_pcre pcre;
      struct pcre_extra;
}


namespace Poco {


00065 class Foundation_API RegularExpression
      /// A class for working with regular expressions.
      /// Implemented using PCRE, the Perl Compatible
      /// Regular Expressions library by Philip Hazel
      /// (see http://www.pcre.org).
{
public:
00072       enum Options // These must match the corresponsing options in pcre.h!
            /// Some of the following options can only be passed to the constructor;
            /// some can be passed only to matching functions, and some can be used
            /// everywhere.
            /// 
            ///   * Options marked [ctor] can be passed to the constructor.
            ///   * Options marked [match] can be passed to match, extract, split and subst.
            ///   * Options marked [subst] can be passed to subst.
            ///
            /// See the PCRE documentation for more information.
      {
            RE_CASELESS        = 0x00000001, /// case insensitive matching (/i) [ctor]
00084             RE_MULTILINE       = 0x00000002, /// enable multi-line mode; affects ^ and $ (/m) [ctor]
00085             RE_DOTALL          = 0x00000004, /// dot matches all characters, including newline (/s) [ctor]
00086             RE_EXTENDED        = 0x00000004, /// totally ignore whitespace (/x) [ctor]
00087             RE_ANCHORED        = 0x00000010, /// treat pattern as if it starts with a ^ [ctor, match]
00088             RE_DOLLAR_ENDONLY  = 0x00000020, /// dollar matches end-of-string only, not last newline in string [ctor]
00089             RE_EXTRA           = 0x00000040, /// enable optional PCRE functionality [ctor]
00090             RE_NOTBOL          = 0x00000080, /// circumflex does not match beginning of string [match]
00091             RE_NOTEOL          = 0x00000100, /// $ does not match end of string [match]
00092             RE_UNGREEDY        = 0x00000200, /// make quantifiers ungreedy [ctor]
00093             RE_NOTEMPTY        = 0x00000400, /// empty string never matches [match]
00094             RE_UTF8            = 0x00000800, /// assume pattern and subject is UTF-8 encoded [ctor]
00095             RE_NO_AUTO_CAPTURE = 0x00001000, /// disable numbered capturing parentheses [ctor, match]
00096             RE_NO_UTF8_CHECK   = 0x00002000, /// do not check validity of UTF-8 code sequences [match]
00097             RE_FIRSTLINE       = 0x00040000, /// an  unanchored  pattern  is  required  to  match
                                             /// before  or  at  the  first  newline  in  the subject string, 
                                             /// though the matched text may continue over the newline [ctor]
00100             RE_DUPNAMES        = 0x00080000, /// names used to identify capturing  subpatterns  need not be unique [ctor]
00101             RE_NEWLINE_CR      = 0x00100000, /// assume newline is CR ('\r'), the default [ctor] 
00102             RE_NEWLINE_LF      = 0x00200000, /// assume newline is LF ('\n') [ctor]
00103             RE_NEWLINE_CRLF    = 0x00300000, /// assume newline is CRLF ("\r\n") [ctor]
00104             RE_NEWLINE_ANY     = 0x00400000, /// assume newline is any valid Unicode newline character [ctor]
00105             RE_NEWLINE_ANYCRLF = 0x00500000, /// assume newline is any of CR, LF, CRLF [ctor]
00106             RE_GLOBAL          = 0x10000000, /// replace all occurences (/g) [subst]
00107             RE_NO_VARS         = 0x20000000  /// treat dollar in replacement string as ordinary character [subst]
      };
      
      struct Match
      {
            std::string::size_type offset; /// zero based offset (std::string::npos if subexpr does not match)
            std::string::size_type length; /// length of substring
      };
      typedef std::vector<Match> MatchVec;
      
      RegularExpression(const std::string& pattern, int options = 0, bool study = true);
            /// Creates a regular expression and parses the given pattern.
            /// If study is true, the pattern is analyzed and optimized. This
            /// is mainly useful if the pattern is used more than once.
            /// For a description of the options, please see the PCRE documentation.
            /// Throws a RegularExpressionException if the patter cannot be compiled.
            
      ~RegularExpression();
            /// Destroys the regular expression.

      int match(const std::string& subject, Match& mtch, int options = 0) const;
            /// Matches the given subject string against the pattern. Returns the position
            /// of the first captured substring in mtch.
            /// If no part of the subject matches the pattern, mtch.offset is std::string::npos and
            /// mtch.length is 0.
            /// Throws a RegularExpressionException in case of an error.
            /// Returns the number of matches.

      int match(const std::string& subject, std::string::size_type offset, Match& mtch, int options = 0) const;
            /// Matches the given subject string, starting at offset, against the pattern. 
            /// Returns the position of the captured substring in mtch.
            /// If no part of the subject matches the pattern, mtch.offset is std::string::npos and
            /// mtch.length is 0.
            /// Throws a RegularExpressionException in case of an error.
            /// Returns the number of matches.

      int match(const std::string& subject, std::string::size_type offset, MatchVec& matches, int options = 0) const;
            /// Matches the given subject string against the pattern. 
            /// The first entry in matches contains the position of the captured substring.
            /// The following entries identify matching subpatterns. See the PCRE documentation
            /// for a more detailed explanation.
            /// If no part of the subject matches the pattern, matches is empty.
            /// Throws a RegularExpressionException in case of an error.
            /// Returns the number of matches.

      bool match(const std::string& subject, std::string::size_type offset = 0) const;
            /// Returns true if and only if the subject matches the regular expression.
            ///
            /// Internally, this method sets the RE_ANCHORED and RE_NOTEMPTY options for
            /// matching, which means that the empty string will never match and
            /// the pattern is treated as if it starts with a ^.

      bool match(const std::string& subject, std::string::size_type offset, int options) const;
            /// Returns true if and only if the subject matches the regular expression.

      bool operator == (const std::string& subject) const;
            /// Returns true if and only if the subject matches the regular expression.
            ///
            /// Internally, this method sets the RE_ANCHORED and RE_NOTEMPTY options for
            /// matching, which means that the empty string will never match and
            /// the pattern is treated as if it starts with a ^.

      bool operator != (const std::string& subject) const;
            /// Returns true if and only if the subject does not match the regular expression.
            ///
            /// Internally, this method sets the RE_ANCHORED and RE_NOTEMPTY options for
            /// matching, which means that the empty string will never match and
            /// the pattern is treated as if it starts with a ^.

      int extract(const std::string& subject, std::string& str, int options = 0) const;
            /// Matches the given subject string against the pattern. 
            /// Returns the captured string.
            /// Throws a RegularExpressionException in case of an error.
            /// Returns the number of matches.

      int extract(const std::string& subject, std::string::size_type offset, std::string& str, int options = 0) const;
            /// Matches the given subject string, starting at offset, against the pattern. 
            /// Returns the captured string.
            /// Throws a RegularExpressionException in case of an error.
            /// Returns the number of matches.

      int split(const std::string& subject, std::vector<std::string>& strings, int options = 0) const;
            /// Matches the given subject string against the pattern. 
            /// The first entry in captured is the captured substring.
            /// The following entries contain substrings matching subpatterns. See the PCRE documentation
            /// for a more detailed explanation.
            /// If no part of the subject matches the pattern, captured is empty.
            /// Throws a RegularExpressionException in case of an error.
            /// Returns the number of matches.

      int split(const std::string& subject, std::string::size_type offset, std::vector<std::string>& strings, int options = 0) const;
            /// Matches the given subject string against the pattern. 
            /// The first entry in captured is the captured substring.
            /// The following entries contain substrings matching subpatterns. See the PCRE documentation
            /// for a more detailed explanation.
            /// If no part of the subject matches the pattern, captured is empty.
            /// Throws a RegularExpressionException in case of an error.
            /// Returns the number of matches.
      
      int subst(std::string& subject, const std::string& replacement, int options = 0) const;
            /// Substitute in subject all matches of the pattern with replacement.
            /// If RE_GLOBAL is specified as option, all matches are replaced. Otherwise,
            /// only the first match is replaced.
            /// Occurences of $<n> (for example, $1, $2, ...) in replacement are replaced 
            /// with the corresponding captured string. $0 is the original subject string.
            /// Returns the number of replaced occurences.

      int subst(std::string& subject, std::string::size_type offset, const std::string& replacement, int options = 0) const;
            /// Substitute in subject all matches of the pattern with replacement,
            /// starting at offset.
            /// If RE_GLOBAL is specified as option, all matches are replaced. Otherwise,
            /// only the first match is replaced.
            /// Unless RE_NO_VARS is specified, occurences of $<n> (for example, $0, $1, $2, ... $9) 
            /// in replacement are replaced with the corresponding captured string. 
            /// $0 is the captured substring. $1 ... $n are the substrings maching the subpatterns.
            /// Returns the number of replaced occurences.

      static bool match(const std::string& subject, const std::string& pattern, int options = 0);
            /// Matches the given subject string against the regular expression given in pattern,
            /// using the given options.

protected:
      std::string::size_type substOne(std::string& subject, std::string::size_type offset, const std::string& replacement, int options) const;

private:
      pcre*       _pcre;
      pcre_extra* _extra;
      
      static const int OVEC_SIZE;
      
      RegularExpression();
      RegularExpression(const RegularExpression&);
      RegularExpression& operator = (const RegularExpression&);
};


//
// inlines
//
00246 inline int RegularExpression::match(const std::string& subject, Match& mtch, int options) const
{
      return match(subject, 0, mtch, options);
}


00252 inline int RegularExpression::split(const std::string& subject, std::vector<std::string>& strings, int options) const
{
      return split(subject, 0, strings, options);
}


00258 inline int RegularExpression::subst(std::string& subject, const std::string& replacement, int options) const
{
      return subst(subject, 0, replacement, options);
}


00264 inline bool RegularExpression::operator == (const std::string& subject) const
{
      return match(subject);
}


00270 inline bool RegularExpression::operator != (const std::string& subject) const
{
      return !match(subject);
}


} // namespace Poco


#endif // Foundation_RegularExpression_INCLUDED

Generated by  Doxygen 1.6.0   Back to index