Description


Can we have the ability to be able to test a string against a regular expressions

Use Case


Just like any other programming langauge such as .NET / Java

Importance


Faster manipulation/searching of strings.

Type


Scripting/Coding

Operating System


Not Applicable

Status


Open

5 thoughts on “Support for regular expressions”

  1. Actually, the ‘xercesc270.dll’ library that is delivered with Uniface contains a regular expression parser. Uniface only uses the XML parsing part of Xerces but you can create a 3GL wrapper dll that accesses this functionality and that you can activate using a C signature in Uniface.

    The following is a C++ program for Windows, Visual Studio 2005, that gives you access to Xerces’s regular expressions. It loads all entrypoints dynamically so you don’t need any Xerces sources to build it, just the Uniface .libs in ...\uniface\3gl\include and header files in ...\uniface\3gl\lib.:


    // regex.cpp, start of 3GL code for operations MATCH and REPLACE in C signature REGEX
    #include

    // Dummy class so we don't need the Xerces include files:
    class dummy
    {
    void func();
    };

    // These typedefs and declarations are needed to dynamically load and access the Regular Expression class methods in Xerces:
    HMODULE hXerces = NULL;
    typedef void (*MemManager)();
    typedef void* (*RegExNew)(int);
    typedef void* (*RegExDel)(dummy*);
    typedef void (*Init)(const char* const, const char* const, void*, void*, bool);
    typedef void (dummy::*RegExConstr1)(const UTF16* const pattern, MemManager);
    typedef void (dummy::*RegExConstr2)(const UTF16* const pattern, const UTF16* const options, MemManager);
    typedef void (dummy::*RegExDestr)();
    typedef bool (dummy::*RegExMtch)(const UTF16* const value, MemManager);
    typedef UTF16* (dummy::*RegExRepl)(const UTF16* const value, const UTF16* const replacewith);
    RegExNew RegExAlloc;
    RegExDel RegExDelete;
    RegExConstr1 RexEgContructor1;
    RegExConstr2 RexEgContructor2;
    RegExDestr RegExDestructor;
    RegExMtch RegExMatch;
    RegExRepl RegExReplace;
    Init Initialize;
    MemManager *MemoryManager;

    // This template solves all our casting problems, reinterpret_cast does not:
    template
    DST cast_it(SRC s)
    {
    union
    {
    DST dst;
    SRC src;
    } cast_union;

    cast_union.src = s;
    return cast_union.dst;
    }

    // Make sure our function names don't get mangled by declaring them extern "C"
    extern "C"
    {
    bool initializeRegEx(void)
    {
    hXerces = LoadLibrary("xercesc270.dll");
    if (hXerces)
    {
    Initialize = cast_it (GetProcAddress(hXerces, "?Initialize@XMLPlatformUtils@xercesc_2_7@@SAXQBD0QAVPanicHandler@2@QAVMemoryManager@2@_N@Z"));
    MemoryManager = cast_it (GetProcAddress(hXerces, "?fgMemoryManager@XMLPlatformUtils@xercesc_2_7@@2PAVMemoryManager@2@A"));
    RegExAlloc = cast_it (GetProcAddress(hXerces, "??2XMemory@xercesc_2_7@@SAPAXI@Z"));
    RegExDelete = cast_it (GetProcAddress(hXerces, "??3XMemory@xercesc_2_7@@SAXPAX@Z"));
    RexEgContructor1 = cast_it (GetProcAddress(hXerces, "??0RegularExpression@xercesc_2_7@@QAE@QBGQAVMemoryManager@1@@Z"));
    RexEgContructor2 = cast_it (GetProcAddress(hXerces, "??0RegularExpression@xercesc_2_7@@QAE@QBG0QAVMemoryManager@1@@Z"));
    RegExDestructor = cast_it (GetProcAddress(hXerces, "??1RegularExpression@xercesc_2_7@@QAE@XZ"));
    RegExMatch = cast_it (GetProcAddress(hXerces, "?matches@RegularExpression@xercesc_2_7@@QAE_NQBGQAVMemoryManager@2@@Z"));
    RegExReplace = cast_it (GetProcAddress(hXerces, "?replace@RegularExpression@xercesc_2_7@@QAEPAGQBG0@Z"));

    if (Initialize)
    Initialize("en_US", 0, 0, 0, false);
    return true;
    }
    return false;
    }

    // TODO: unload the library and other cleanup

    // The MATCH operation:
    EXPORT long match(UTF16 *value, UTF16 *expression, UTF16 *option)
    {
    long result;
    dummy *re = NULL;

    if (!expression || !*expression)
    result = -1113;
    else
    {
    if (!hXerces && !initializeRegEx())
    return -150;

    re = (dummy*)RegExAlloc(60); // new, sizeof(XERCES_CPP_NAMESPACE::RegularExpression)) is 60

    if (option && *option)
    {
    (re->*RexEgContructor2)(expression, option, *MemoryManager); // constructor 2 of RegularExpression
    }
    else
    {
    (re->*RexEgContructor1)(expression, *MemoryManager); // constructor 1 of RegularExpression
    }

    if (re == NULL)
    result = -150;
    else if ((re->*RegExMatch)(value, *MemoryManager)) // re->matches(...)
    result = 1; // match
    else
    result = 0; // no match
    }
    if (re)
    {
    (re->*RegExDestructor)();
    RegExDelete(re);
    }
    return result;
    }

    // The REPLACE operation:
    EXPORT long replace(UTF16 *value, UTF16 *expression, UTF16 *replacewith, UTF16 *option)
    {
    long result;
    dummy *re = NULL;
    UTF16 *val;

    if (!expression || !*expression)
    result = -1113;
    else
    {
    if (!hXerces && !initializeRegEx())
    return -150;

    re = (dummy*)RegExAlloc(60); // new, sizeof(XERCES_CPP_NAMESPACE::RegularExpression)) is 60

    if (option && *option)
    {
    (re->*RexEgContructor2)(expression, option, *MemoryManager); // constructor 2 of RegularExpression
    }
    else
    {
    (re->*RexEgContructor1)(expression, *MemoryManager); // constructor 1 of RegularExpression
    }

    if (re == NULL)
    result = -150;
    else
    {
    val = ((re->*RegExReplace)(value, replacewith)); // re->replace(...)
    if (val && wcscmp(val, value) != 0)
    {
    wcscpy(value, val);
    result = 1; // match
    }
    else
    {
    *value = L'\0';
    result = 0; // no match
    }
    }
    }
    if (re)
    {
    (re->*RegExDestructor)();
    RegExDelete(re);
    }
    return result;
    }

    } // extern "C"


    To build it use this batch script:

    set UF=…\uniface\3gl
    REM /Zi : debuggable
    cl /c /Zi /Zc:wchar_t- /I%UF%\include regex.cpp
    REM /debug : debuggable dll for REGEX:
    link /dll /debug /out:regex.dll regex.obj %UF%\lib\ucall.lib %UF%\lib\yrtl.lib user32.lib

    In your assignment file add regex.dll to the [USER_3GL] section.

    Create a C signature called REGEX with two operations:
    MATCH(string value:IN, string expression:IN, string option:IN)
    REPLACE(string value:INOUT, string expression:IN, string replacement:IN, string option:IN)
    Make sure to set ‘Character set used’ to UTF16.

    Now you can make these entries:

    entry match
    returns boolean
    params
    string rvalue :in
    string rexpr :in
    string roption:in
    endparams

    activate "REGEX".MATCH(rvalue, rexpr, roption)
    end;

    entry subst
    returns string
    params
    string rvalue :in
    string rexpr :in
    string rreplac:in
    string roption:in
    endparams

    activate "REGEX".REPLACE(rvalue, rexpr, rreplac, roption)
    return rvalue
    end;


    Example usage of these entries:

    if (match(VALUE, "^a.*b$", "i") > 0) ; see if VALUE starts with a and ends with b, case-insensitive
    message " Match !!"
    else
    message " No match!!"
    endif

    $1 = subst("abcdefg", "b.*f", "Q", "")
    ; $1 is now "aQg"

  2. The code above got mangled a bit by posting it here. The #include line is missing “h3gl.h”, some long lines are wrapped, and leading spaces were removed. You’ll need to correct this when you copy and paste it.

Leave a Reply