Description
Can we have the ability to be able to test a string against a regular expressions
Use Case
Just like any other programming langauge such as .NET / Java
Importance
Faster manipulation/searching of strings.
Type
Scripting/Coding
Operating System
Not Applicable
Status
Open
Duplicated wish.
Actually, the ‘xercesc270.dll’ library that is delivered with Uniface contains a regular expression parser. Uniface only uses the XML parsing part of Xerces but you can create a 3GL wrapper dll that accesses this functionality and that you can activate using a C signature in Uniface.
The following is a C++ program for Windows, Visual Studio 2005, that gives you access to Xerces’s regular expressions. It loads all entrypoints dynamically so you don’t need any Xerces sources to build it, just the Uniface .libs in
...\uniface\3gl\include
and header files in...\uniface\3gl\lib
.:// regex.cpp, start of 3GL code for operations MATCH and REPLACE in C signature REGEX
#include
// Dummy class so we don't need the Xerces include files:
class dummy
{
void func();
};
// These typedefs and declarations are needed to dynamically load and access the Regular Expression class methods in Xerces:
HMODULE hXerces = NULL;
typedef void (*MemManager)();
typedef void* (*RegExNew)(int);
typedef void* (*RegExDel)(dummy*);
typedef void (*Init)(const char* const, const char* const, void*, void*, bool);
typedef void (dummy::*RegExConstr1)(const UTF16* const pattern, MemManager);
typedef void (dummy::*RegExConstr2)(const UTF16* const pattern, const UTF16* const options, MemManager);
typedef void (dummy::*RegExDestr)();
typedef bool (dummy::*RegExMtch)(const UTF16* const value, MemManager);
typedef UTF16* (dummy::*RegExRepl)(const UTF16* const value, const UTF16* const replacewith);
RegExNew RegExAlloc;
RegExDel RegExDelete;
RegExConstr1 RexEgContructor1;
RegExConstr2 RexEgContructor2;
RegExDestr RegExDestructor;
RegExMtch RegExMatch;
RegExRepl RegExReplace;
Init Initialize;
MemManager *MemoryManager;
// This template solves all our casting problems, reinterpret_cast does not:
template
DST cast_it(SRC s)
{
union
{
DST dst;
SRC src;
} cast_union;
cast_union.src = s;
return cast_union.dst;
}
// Make sure our function names don't get mangled by declaring them extern "C" (GetProcAddress(hXerces, "?Initialize@XMLPlatformUtils@xercesc_2_7@@SAXQBD0QAVPanicHandler@2@QAVMemoryManager@2@_N@Z")); (GetProcAddress(hXerces, "?fgMemoryManager@XMLPlatformUtils@xercesc_2_7@@2PAVMemoryManager@2@A")); (GetProcAddress(hXerces, "??2XMemory@xercesc_2_7@@SAPAXI@Z")); (GetProcAddress(hXerces, "??3XMemory@xercesc_2_7@@SAXPAX@Z")); (GetProcAddress(hXerces, "??0RegularExpression@xercesc_2_7@@QAE@QBGQAVMemoryManager@1@@Z")); (GetProcAddress(hXerces, "??0RegularExpression@xercesc_2_7@@QAE@QBG0QAVMemoryManager@1@@Z")); (GetProcAddress(hXerces, "??1RegularExpression@xercesc_2_7@@QAE@XZ")); (GetProcAddress(hXerces, "?matches@RegularExpression@xercesc_2_7@@QAE_NQBGQAVMemoryManager@2@@Z")); (GetProcAddress(hXerces, "?replace@RegularExpression@xercesc_2_7@@QAEPAGQBG0@Z"));
extern "C"
{
bool initializeRegEx(void)
{
hXerces = LoadLibrary("xercesc270.dll");
if (hXerces)
{
Initialize = cast_it
MemoryManager = cast_it
RegExAlloc = cast_it
RegExDelete = cast_it
RexEgContructor1 = cast_it
RexEgContructor2 = cast_it
RegExDestructor = cast_it
RegExMatch = cast_it
RegExReplace = cast_it
if (Initialize)
Initialize("en_US", 0, 0, 0, false);
return true;
}
return false;
}
// TODO: unload the library and other cleanup
// The MATCH operation:
EXPORT long match(UTF16 *value, UTF16 *expression, UTF16 *option)
{
long result;
dummy *re = NULL;
if (!expression || !*expression)
result = -1113;
else
{
if (!hXerces && !initializeRegEx())
return -150;
re = (dummy*)RegExAlloc(60); // new, sizeof(XERCES_CPP_NAMESPACE::RegularExpression)) is 60
if (option && *option)
{
(re->*RexEgContructor2)(expression, option, *MemoryManager); // constructor 2 of RegularExpression
}
else
{
(re->*RexEgContructor1)(expression, *MemoryManager); // constructor 1 of RegularExpression
}
if (re == NULL)
result = -150;
else if ((re->*RegExMatch)(value, *MemoryManager)) // re->matches(...)
result = 1; // match
else
result = 0; // no match
}
if (re)
{
(re->*RegExDestructor)();
RegExDelete(re);
}
return result;
}
// The REPLACE operation:
EXPORT long replace(UTF16 *value, UTF16 *expression, UTF16 *replacewith, UTF16 *option)
{
long result;
dummy *re = NULL;
UTF16 *val;
if (!expression || !*expression)
result = -1113;
else
{
if (!hXerces && !initializeRegEx())
return -150;
re = (dummy*)RegExAlloc(60); // new, sizeof(XERCES_CPP_NAMESPACE::RegularExpression)) is 60
if (option && *option)
{
(re->*RexEgContructor2)(expression, option, *MemoryManager); // constructor 2 of RegularExpression
}
else
{
(re->*RexEgContructor1)(expression, *MemoryManager); // constructor 1 of RegularExpression
}
if (re == NULL)
result = -150;
else
{
val = ((re->*RegExReplace)(value, replacewith)); // re->replace(...)
if (val && wcscmp(val, value) != 0)
{
wcscpy(value, val);
result = 1; // match
}
else
{
*value = L'\0';
result = 0; // no match
}
}
}
if (re)
{
(re->*RegExDestructor)();
RegExDelete(re);
}
return result;
}
} // extern "C"
To build it use this batch script:
set UF=…\uniface\3gl
REM /Zi : debuggable
cl /c /Zi /Zc:wchar_t- /I%UF%\include regex.cpp
REM /debug : debuggable dll for REGEX:
link /dll /debug /out:regex.dll regex.obj %UF%\lib\ucall.lib %UF%\lib\yrtl.lib user32.lib
In your assignment file add regex.dll to the [USER_3GL] section.
Create a C signature called REGEX with two operations:
MATCH(string value:IN, string expression:IN, string option:IN)
REPLACE(string value:INOUT, string expression:IN, string replacement:IN, string option:IN)
Make sure to set ‘Character set used’ to UTF16.
Now you can make these entries:
entry match
returns boolean
params
string rvalue :in
string rexpr :in
string roption:in
endparams
activate "REGEX".MATCH(rvalue, rexpr, roption)
end;
entry subst
returns string
params
string rvalue :in
string rexpr :in
string rreplac:in
string roption:in
endparams
activate "REGEX".REPLACE(rvalue, rexpr, rreplac, roption)
return rvalue
end;
Example usage of these entries:
if (match(VALUE, "^a.*b$", "i") > 0) ; see if VALUE starts with a and ends with b, case-insensitive
message " Match !!"
else
message " No match!!"
endif
$1 = subst("abcdefg", "b.*f", "Q", "")
; $1 is now "aQg"
Documentation of Xerces regular expressions is here: https://xerces.apache.org/xerces2-j/javadocs/xerces2/org/apache/xerces/impl/xpath/regex/RegularExpression.html
The code above got mangled a bit by posting it here. The #include line is missing “h3gl.h”, some long lines are wrapped, and leading spaces were removed. You’ll need to correct this when you copy and paste it.
could you repost the code corrected?