Codebase list ruby-fxruby / ddfc60e fox-includes / FXRex.h
ddfc60e

Tree @ddfc60e (Download .tar.gz)

FXRex.h @ddfc60eraw · history · blame

/********************************************************************************
*                                                                               *
*                 R e g u l a r   E x p r e s s i o n   C l a s s               *
*                                                                               *
*********************************************************************************
* Copyright (C) 1999,2006 by Jeroen van der Zijp.   All Rights Reserved.        *
*********************************************************************************
* This library is free software; you can redistribute it and/or                 *
* modify it under the terms of the GNU Lesser General Public                    *
* License as published by the Free Software Foundation; either                  *
* version 2.1 of the License, or (at your option) any later version.            *
*                                                                               *
* This library is distributed in the hope that it will be useful,               *
* but WITHOUT ANY WARRANTY; without even the implied warranty of                *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU             *
* Lesser General Public License for more details.                               *
*                                                                               *
* You should have received a copy of the GNU Lesser General Public              *
* License along with this library; if not, write to the Free Software           *
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.    *
*********************************************************************************
* $Id: FXRex.h 2343 2006-02-12 20:26:26Z lyle $                             *
********************************************************************************/
#ifndef FXREX_H
#define FXREX_H


namespace FX {


/// Regular expression error codes
enum FXRexError {
  REGERR_OK,
  REGERR_EMPTY,             /// Empty pattern
  REGERR_PAREN,             /// Unmatched parenthesis
  REGERR_BRACK,             /// Unmatched bracket
  REGERR_BRACE,             /// Unmatched brace
  REGERR_RANGE,             /// Bad character range
  REGERR_ESC,               /// Bad escape sequence
  REGERR_COUNT,             /// Bad counted repeat
  REGERR_NOATOM,            /// No atom preceding repetition
  REGERR_REPEAT,            /// Repeat following repeat
  REGERR_BACKREF,           /// Bad backward reference
  REGERR_CLASS,             /// Bad character class
  REGERR_COMPLEX,           /// Expression too complex
  REGERR_MEMORY,            /// Out of memory
  REGERR_TOKEN              /// Illegal token
  };


/// Regular expression parse flags
enum {
  REX_NORMAL    = 0,        /// Normal mode
  REX_CAPTURE   = 1,        /// Perform capturing parentheses
  REX_ICASE     = 2,        /// Case independent matching
  REX_NEWLINE   = 4,        /// Match-any operators match newline too
  REX_VERBATIM  = 8,        /// Disable interpretation of magic characters
  REX_SYNTAX    = 16        /// Perform syntax check only
  };


/// Regular expression match flags
enum {
  REX_FORWARD   = 0,        /// Match scanning forward from offset
  REX_BACKWARD  = 32,       /// Match scanning backward from offset
  REX_NOT_BOL   = 64,       /// Start of string is NOT begin of line
  REX_NOT_EOL   = 128,      /// End of string is NOT end of line
  REX_NOT_EMPTY = 256       /// Do not match empty
  };


/**
* FXRex is a regular expression class implementing a NFA matcher.
* It supports capturing parentheses, non-capturing parentheses,
* positive or negative lookahead, backreferences, case-insensitive
* matching, counted repetitions, lazy or greedy matches, and
* PERL-like matching operators.
* The subject string may be scanned forwards or backwards, and may
* contain any of 256 possible character values.
*
* When parsing a regular expression pattern, the mode parameter is
* the bitwise OR of a set of flags and affects the match algorithm.
* Passing the flag REX_CAPTURE enables capturing parentheses
* and back references. The flag REX_ICASE enables case-insensitive
* matching. When the flag REX_NEWLINE is passed, newlines are treated
* like normal characters; otherwise, newline is NOT matched
* except when explicitly part of a character class. The flag
* REX_VERBATIM disables all special character interpretation.
*
* When matching a compiled pattern, the mode parameter is the
* bitwise OR of a set of flags that affects how the match is
* performed.  Passing the flag REX_BACKWARD causes the match
* to proceed backwards through the subject string.  Passing the
* flags REX_NOT_BOL and/or REX_NOT_EOL causes the begin and
* end of the subject string NOT to be considered a line start
* or line end. The flag REX_NOT_EMPTY causes a match to fail if
* the empty string was matched.
*/
class FXAPI FXRex {
private:
  FXint *code;
private:
  static const FXchar *const errors[];
  static const FXint fallback[];
public:

  /// Construct empty regular expression object
  FXRex():code((FXint*)fallback){}

  /// Copy regular expression object
  FXRex(const FXRex& orig);

  /// Compile expression from pattern; if error is not NULL, error code is returned
  FXRex(const FXchar* pattern,FXint mode=REX_NORMAL,FXRexError* error=NULL);

  /// Compile expression from pattern; if error is not NULL, error code is returned
  FXRex(const FXString& pattern,FXint mode=REX_NORMAL,FXRexError* error=NULL);

  /// Assign another regular expression to this one
  FXRex& operator=(const FXRex& orig);

  /**
  * See if regular expression is empty; the regular expression
  * will be empty when it is unable to parse a pattern due to
  * a syntax error.
  */
  bool empty() const { return (code==fallback); }

  /// Parse pattern, return error code if syntax error is found
  FXRexError parse(const FXchar* pattern,FXint mode=REX_NORMAL);

  /// Parse pattern, return error code if syntax error is found
  FXRexError parse(const FXString& pattern,FXint mode=REX_NORMAL);

  /**
  * Match a subject string of length len, returning TRUE if a match is found
  * and FALSE otherwise.  The entire pattern is captured in beg[0] and end[0],
  * where beg[0] refers to the position of the first matched character and end[0]
  * refers to the position after the last matched character.
  * Sub expressions from capturing parenthesis i are returned in beg[i] and end[i].
  */
  bool match(const FXchar* string,FXint len,FXint* beg=NULL,FXint* end=NULL,FXint mode=REX_FORWARD,FXint npar=1,FXint fm=0,FXint to=2147483647) const;

  /// Search for match in a string
  bool match(const FXString& string,FXint* beg=NULL,FXint* end=NULL,FXint mode=REX_FORWARD,FXint npar=1,FXint fm=0,FXint to=2147483647) const;

  /**
  * After performing a regular expression match with capturing parentheses,
  * a substitution string is build from the replace string, where where "&"
  * is replaced by the entire matched pattern, and "\1" through "\9" are
  * replaced by captured expressions.  The original source string and its
  * length, and the match arrays beg and end must be passed.
  */
  static FXString substitute(const FXchar* string,FXint len,FXint* beg,FXint* end,const FXString& replace,FXint npar=1);

  /// Return substitution string
  static FXString substitute(const FXString& string,FXint* beg,FXint* end,const FXString& replace,FXint npar=1);

  /// Returns error code for given error
  static const FXchar* getError(FXRexError err){ return errors[err]; }

  /// Comparison operators
  bool operator==(const FXRex& rex) const;
  bool operator!=(const FXRex& rex) const;

  /// Saving and loading
  friend FXAPI FXStream& operator<<(FXStream& store,const FXRex& s);
  friend FXAPI FXStream& operator>>(FXStream& store,FXRex& s);

  /// Delete
 ~FXRex();
  };


extern FXAPI FXStream& operator<<(FXStream& store,const FXRex& s);
extern FXAPI FXStream& operator>>(FXStream& store,FXRex& s);

}

#endif