xsharp.eu • FoxPro function list updated - Page 5
Page 5 of 7

FoxPro function list updated

Posted: Thu Feb 13, 2020 11:47 am
by mainhatten
Better check those - except for the __SubStr() I want to read up first changes included, compiling ok

FoxPro function list updated

Posted: Thu Feb 13, 2020 11:58 am
by mainhatten
Somehow Attachments don't attach
StringFunctions and Functions inlined

If the fixes for .null. with Stringfunctions are only needed/wanted for vfp (dunno what Clipper does if fed .Null.)
perhaps best if IsNull is implemented ASAP - I don't want to touch that, as I have no idea where to draw the line with DBNull, .Null., Nil, None and so on.
regards
Thomas

Code: Select all

//
// Copyright (c) XSharp B.V.  All Rights Reserved.
// Licensed under the Apache License, Version 2.0.
// See License.txt in the project root for license information.
//

// String Functions


USING System
USING System.Collections.Generic
USING System.Text
USING System.IO


/// <include file="VfpRuntimeDocs.xml" path="Runtimefunctions/addbs/*" />
/// <seealso cref='M:XSharp.VFP.Functions.DefaultExt(System.String)' />
/// <seealso cref='M:XSharp.VFP.Functions.JustDrive(System.String)' />
/// <seealso cref='M:XSharp.VFP.Functions.JustExt(System.String)' />
/// <seealso cref='M:XSharp.VFP.Functions.JustFName(System.String)' />
/// <seealso cref='M:XSharp.VFP.Functions.JustPath(System.String)' />
/// <seealso cref='M:XSharp.VFP.Functions.JustStem(System.String)' />
FUNCTION AddBs (cPath AS STRING) AS STRING
    IF String.IsNullOrEmpty(cPath)
        RETURN ""
    ENDIF
    VAR delim := Path.DirectorySeparatorChar:ToString()
    cPath := cPath:TrimEnd()
    IF ! cPath.EndsWith(delim)
        cPath += delim
    ENDIF
    RETURN cPath


/// <include file="VfpRuntimeDocs.xml" path="Runtimefunctions/justdrive/*" />
/// <include file="VfpRuntimeDocs.xml" path="Runtimefunctions/justcommon/*" />
FUNCTION JustDrive(cPath AS STRING) AS STRING
    IF String.IsNullOrEmpty(cPath)
        RETURN ""
    ENDIF
    VAR result := System.IO.Directory.GetDirectoryRoot(cPath)
    result := result:Replace(Path.DirectorySeparatorChar:ToString(),"")
    RETURN result


/// <include file="VfpRuntimeDocs.xml" path="Runtimefunctions/justpath/*" />
/// <include file="VfpRuntimeDocs.xml" path="Runtimefunctions/justcommon/*" />
FUNCTION JustPath(cPath AS STRING) AS STRING
    IF String.IsNullOrEmpty(cPath)
        RETURN ""
    ENDIF
    LOCAL cPathChar := Path.DirectorySeparatorChar:ToString() AS STRING
    LOCAL result := cPath AS STRING
    IF result:IndexOf(cPathChar) >= 0
        result := result:Substring(0, result:LastIndexOf(cPathChar) -1)
    ENDIF
    RETURN result


/// <include file="VfpRuntimeDocs.xml" path="Runtimefunctions/justfname/*" />
/// <include file="VfpRuntimeDocs.xml" path="Runtimefunctions/justcommon/*" />
FUNCTION JustFName(cPath AS STRING) AS STRING
    IF String.IsNullOrEmpty(cPath)
        RETURN ""
    ENDIF
    VAR result := Path.GetFileName(cPath)
    RETURN result


/// <include file="VfpRuntimeDocs.xml" path="Runtimefunctions/juststem/*" />
/// <include file="VfpRuntimeDocs.xml" path="Runtimefunctions/justcommon/*" />
FUNCTION JustStem(cPath AS STRING) AS STRING
    IF String.IsNullOrEmpty(cPath)
        RETURN ""
    ENDIF
    VAR result := Path.GetFileNameWithoutExtension(cPath)
    RETURN result


/// <include file="VfpRuntimeDocs.xml" path="Runtimefunctions/justext/*" />
/// <include file="VfpRuntimeDocs.xml" path="Runtimefunctions/justcommon/*" />
FUNCTION JustExt(cPath AS STRING) AS STRING
    *-- Default for new parameter  lOptWithLeadingDot ist .f.
    *-- As returning all extensions with leading dot could lead to breaking changes
    return JustExt(cPath, .f.)

FUNCTION JustExt(cPath AS STRING, lOptWithLeadingDot AS BOOLEAN) AS STRING
    IF String.IsNullOrEmpty(cPath)
        RETURN ""
    ENDIF
    local result as STRING
    result := Path.GetExtension(cPath)
    *-- pcount() not allowed according to compiler
    *-- as types are known use __SubStr() from core
    if lOptWithLeadingDot=.f. and result:StartsWith(".")
        result := __SubStr(result, 2, -1)
    endif
    RETURN result


/// <summary>-- todo --</summary>
/// <include file="VFPDocs.xml" path="Runtimefunctions/forceext/*" />
FUNCTION ForceExt( cFileName AS STRING, cExtension AS STRING) AS STRING
    IF String.IsNullOrEmpty(cFileName)
        RETURN ""
    ENDIF
    VAR result := Path.ChangeExtension(cFileName,cExtension)
    RETURN result

FUNCTION ForceExt( cFileName AS STRING, cExtension AS STRING, tlOptAsVfp9 AS BOOLEAN) AS STRING
    *-- current take on matters is that the Dotnet-Version should be Default behaviour
    *-- as vfp9 version behaviour in edge cases could be seen as erroneous
    *-- work in progress and not tested, as existing code should only call 2-parameter overload should be safe
    if tlOptAsVfp9=.f. 
        return ForceExt( cFileName, cExtension)
    endif
    local lcFilename as string
    lcFilename := JustFName(cFileName)
    if lcFileName:EndsWith(".")
        *-- if filename ends with dot, cut that
        *-- but only rightmost one, ending in several dots cuts still only 1
        lcFileName := __SubStr(lcFileName, 1, lcFileName:Length-1 )
    endif
    var lcExtension := cExtension
    if cExtension:StartsWith(".")
        lcExtension := __SubStr(lcFileName, 2, -1)
    else
        lcExtension := cExtension
    endif
    local lcResult as STRING
    if min(len(lcFilename), len(lcExtension))>0
        lcResult := lcFilename + "." + lcExtension
    else
        lcResult := ""
    endif
    RETURN lcResult

/// <summary>-- todo --</summary>
/// <include file="VFPDocs.xml" path="Runtimefunctions/forcepath/*" />

FUNCTION ForcePath( cFileName AS STRING, cPath AS STRING) AS STRING
    *-- check if path needs also check...
    IF String.IsNullOrEmpty(cFileName)
        RETURN ""
    ENDIF
    var lcReturn := AddBS(cPath) + JustFName(cFileName)
    RETURN lcReturn
 
/// <include file="VfpRuntimeDocs.xml" path="Runtimefunctions/leftc/*" />
FUNCTION LeftC( cExpression AS STRING, nExpression AS DWORD) AS STRING
    RETURN Left(cExpression, nExpression)

/// <include file="VfpRuntimeDocs.xml" path="Runtimefunctions/lenc/*" />
FUNCTION LenC( cExpression AS STRING ) AS DWORD
    RETURN SLen(cExpression)

/// <include file="VfpRuntimeDocs.xml" path="Runtimefunctions/likec/*" />
FUNCTION LikeC( cExpression1, cExpression2) AS LOGIC
    RETURN Like(cExpression1, cExpression2)


/// <include file="VfpRuntimeDocs.xml" path="Runtimefunctions/rightc/*" />
FUNCTION RightC( cExpression AS STRING, nCharacters AS DWORD) AS STRING
    RETURN Right(cExpression, nCharacters)

/// <include file="VfpRuntimeDocs.xml" path="Runtimefunctions/stuffc/*" />
FUNCTION StuffC( cExpression, nStartReplacement, nCharactersReplaced, cReplacement) AS STRING
    RETURN Stuff(cExpression, nStartReplacement, nCharactersReplaced, cReplacement)
    
/// <include file="VfpRuntimeDocs.xml" path="Runtimefunctions/substrc/*" />
FUNCTION SubStrC(cExpression, nStartPosition , nCharactersReturned ) AS STRING
    RETURN SubStr(cExpression, nStartPosition, nCharactersReturned)

Code: Select all

/// <summary>Creates an object from a class definition or an Automation-enabled application.</summary>
/// <param name="cClassName">Specifies the class or OLE object from which the new object is created.</param>
/// <param name="_args">These optional parameters are used to pass values to the Init event procedure for the class.
/// The Init event is executed when you issue CREATEOBJECT( ) and allows you to initialize the object.</param>
/// <returns>The object that was created</returns>
/// <seealso cref='M:XSharp.RT.Functions.CreateInstance(XSharp.__Usual,XSharp.__Usual)' >CreateInstance</seealso>

FUNCTION CreateObject(cClassName, _args ) AS OBJECT CLIPPER
    // The pseudo function _ARGS() returns the Clipper arguments array
    RETURN CreateInstance(_ARGS())

/// <include file="VFPDocs.xml" path="Runtimefunctions/createobjectex/*" />
FUNCTION CreateObjectEx(cClsIdOrcProgId, cComputerName , cIID ) AS OBJECT CLIPPER
    // The pseudo function _ARGS() returns the Clipper arguments array
    RETURN CreateInstance(_ARGS())


PROCEDURE RddInit() AS VOID _INIT3
    // Make sure that the VFP dialect has the DBFVFP driver as default RDD
    RddSetDefault("DBFVFP")
    RuntimeState.SetValue(Set.FoxCollate,"")
    RuntimeState.SetValue(Set.MemoWidth, 50)
    RuntimeState.SetValue(Set.Near, FALSE)
    RuntimeState.SetValue(Set.SqlAnsi, FALSE)
    RuntimeState.SetValue(Set.FoxLock, TRUE)
    RuntimeState.SetValue(Set.Eof, TRUE)
    RETURN 



Function SetFoxCollation(cCollation as STRING) AS STRING
local cOld := RuntimeState.GetValue<STRING>(Set.FoxCollate) AS STRING
local aAllowed as STRING[]
LOCAL lOk := FALSE as LOGIC
LOCAL cValue := cCollation as STRING
aAllowed := System.Enum.GetNames(typeof(XSharp.FoxCollations))
cValue := cValue:Trim():ToUpper()
FOREACH VAR cEnum in aAllowed
    IF String.Compare(cValue, cEnum, StringComparison.OrdinalIgnoreCase) == 0
        lOk := TRUE
        EXIT
    ENDIF
NEXT
IF lOk
    RuntimeState.SetValue(Set.FoxCollate,cValue)
ELSE
    local oError as Error
    oError := Error.ArgumentError(__FUNCTION__, nameof(cCollation), 1, {cCollation})
    oError:Description := "Collating sequence '"+cCollation+"' is not found"
    oError:Throw()
ENDIF
RETURN cOld

function ICase(lCond1, uExp1, lCond2, uExp2) as usual
     LOCAL nCount := PCount() AS LONG
    // loop through the actual parameters. The odd parameters should be logic
    // the even parameters are return values for their siblings.
    for var nI := 1 to nCount-1 step 2
        local cond := _GetFParam(nI) as logic
        if cond
            return _GetFParam(nI+1)
        endif
    next
    // no conditions are met, if the # of parameters is odd then return the last value
    if nCount % 2 == 1
        return _GetFParam(nCount)
    endif
    // the # of parameters is even. When >= 2 then get the type of parameter 2 and return an empty value
    if PCount() >= 2
        var type := Usual

FoxPro function list updated

Posted: Thu Feb 13, 2020 5:07 pm
by mainhatten
Hi Robert,
robert wrote:- avoid using Left() and Right() to check for starting or ending with a certain character.
These functions create new strings and you don't want that. Use :EndsWith() on the string in stead
- avoid calling untyped functions such as Len(). For strings we can call the :Length property or the typed function SLen() (which does not crash when the string is NULL)
Full ACK on Starts/EndsWith, had started with :Length and should have thought of them
- there is no need to create locals lcFileName etc when the parameter is already declared properly
At the end typically further tightening up, if a function/metthod changes value parameters it is nice to see in debug where we started / ability to run again from very first method line
- instead of a function __SubStr() you can call string:Substring() with a 0 based offset.
Please check settings: in the package I downloaded, only Core, RDD and Generic have set <AZ>True (I am learning VS... less magic, more understanding). That was one of the first things I checked when project opened, I guessed that the package was envisioned "below" RT, therefore Arrays were worked on 1-based. Some of the funcs would fit into Core (not that Dir helper functions will cause lot of runtime....) but because they offer easy-to-read functionality for things found in most programs. I wanted code to be movable to RT, Core or snipped and used in part from userland, so avoided String methods too much - but SubString on purpose, as this would introduce errors whenever moved to "other" array base and start given according to something based on RT-Functions. __SubStr() at least is typed and partly undeterred, as both starts are supported.
- in the runtime code we prefer to use ":=" in stead of "=" to avoid confusion between comparisons and assignments and we also prefer to use the ":" send operator over the "." operator because that last one could also be interpreted as workarea.Field .
ACK and mostly done already. ACK also on benefit of less confusion, bad part is I cannot run identical snippets in vfp - so sometimes this might happen and fall under "tightening up at the end" Some worries on not using at least hungarian notation - vfp precedence for unaliased field names before memvars without m.Dot will separate code and "movability" more than necessary. While Hungarian is not entirely safe on vfp, it protects for more than 95% for code entered without MDot. I realize Runtime code is different, but think keeping Hungarian notation on vfp side is beneficial -- even if only to show good example for those peeking in the code from userland. Your call.

regards
Thomas

FoxPro function list updated

Posted: Tue Feb 25, 2020 9:18 pm
by mainhatten
Hi,
GetWordNum & GetWordCount implemented and working, but not as fast I hoped to get them.

Currently testing durations of different method on loooooong strings (see Tolstoy records from example),
have at least found 1 area to beat vfp speeds - despite working on 2-byte chars in Dotnet/xSharp.

When vfp gets the string into internal memory, it is wicked fast in the C-engine.
Next step will be to try to get a measure of how xSharp performs on millions of calls (feeding large tables of small strings to both), where I hope to offer a faster way by separating delimiter setting and actual string work by offering class methods.

The alines() functionality (sans creating array, if it does not exist) would fit right in, so I will add on that front as well, if not done already - something like __alines(taArray ref String[], .....) to hook into existing code.

That still open ?

regards
Thomas

FoxPro function list updated

Posted: Wed Feb 26, 2020 8:17 am
by Chris
Hi Thomas,

Can you please show us the code? Maybe we can have an idea or two by looking at it on how to improve performance.
Sorry, I did not understand what you meant about alines(), can you please explain?

FoxPro function list updated

Posted: Wed Feb 26, 2020 10:52 am
by mainhatten
Hi Chris,
Chris wrote:Can you please show us the code? Maybe we can have an idea or two by looking at it on how to improve performance.
Certainly - but work in progress, with code paths measured still included.
GetWord.ZIP
(5.18 KiB) Downloaded 64 times
In Attachment are 3 Files:
GetWord.Prg housing the Implementation via class, overload calling functions as in vfp, minimal glue function called in my test program and "alternative search for delimiters" options.
GetWrdTime has xSharp version of test&timing program>> needs to read the 4 records test database I uploaded in examples forum, not uploaded here again.
Tolstoy DBF is the table timing results are written to. Paths at head of tst code, somewhat self documenting as logging the lcWitheStr which gets eval()-ed into tcDelimiters and Switch from SetDelimiter().

I have tested vfp on same table with 4 Tolstoy novels as test input first to get an idea how they probably implemented - variations on string length via table, variation on set of chars to be used as delimiters in program,, outcommenting lcWhiteStr, last one wins.
Timings indicate to me that Vfp seems to always implement some kind of binary search, as performance suffers with more delimiters but not so drastic as to hint at a linear search. Expected vfp to have a benefit of checking 1-Byte Char via pure C code after getting both strings into internal memory space vs. Dotnet unicode, but was surprized at speed difference: on small tcDelimiters vfp was much faster, with rather large sets dictionary access sees only minimal growth, whereas vfp binary(?) search grows faster.

First tries with implementing as functions resulted in lousy times, but created the skeleton of functions now implemented in class.

Next idea was to try out different structures for storing and accessing tcDelimiter.

Created delegates for the "StringCounting/WordGet" methods as well as a delegate for checking if "currentChar" is in the data structure currently tested.

Basic idea: split up functionality of setting up tcDelimiters in class, identify best access in 1 method, get count or specific word many times over calling specific method of class millions of times for each row of tables without incurring "setup time" for each row or field I have 1 task in vfp where I work on different field with different delimiters - here I could create GetWord-Classes in xSharp preloaded for specific delimiter and call on each field with correct delimiters already preloaded.

Other idea: checking for single delimiter is not much faster in vfp, but creating special "mode" for single char delimiter only might be much faster if compared directly, which turned out to be true and DOES happen often in my vfp code - when reading in already purged data fields single only delimiter is space(1).

Measuring showed that binary search was slower compared to dictionary even on small multi-Char delimiters, I had expected binary search to be faster for small ranges - single digit counts perhaps as the vfp default of [chr(32),chr(10),chr(9)]. Not so.

Then I tried out, if some "special" delimiters might be better handled directly in code, sidestepping DotNet data structures - see
IsVfpWhite (tested currenty only for first implementation), as byte pattern / order of possible comparisons is known in advance.

Yupp, is faster. Then decided to test if "delegating" as "IsDelimiter" incurs measurable cost, therefore implemented same Stringchecking calling the best "IsDelimiter" implementation directly and not via "delegate alias" - yupp, also faster.
This lead to default implementation in "Otherwise" of switch in SetDelimiters.

Is not cleaned up, as I want to test on large# or rows next to streamline that approach. More effort than expected, but it was basic for some of my tasks in vfp and I want to have an implementation not much slower in xSharp ;-)

Sorry, I did not understand what you meant about alines(), can you please explain?

Array functions in vfp can create the array variables given as "Ref" parameters even if they don't exist when function is called. Was mentioned in Forum in discussion between Robert, Antonio and myself. I gave a tentative implementation via Try/Catch for situations where resulting array has to be created, but that functionality will be needed by more vfp array functions, so perhaps better handeled by dev team.

I would implement Alines() when given an existing String[] as ref Parameter, leaving the variable checking needed for other array functions first alone. If still unclear, pls. ask.

regards
thomas

FoxPro function list updated

Posted: Wed Feb 26, 2020 11:15 am
by mainhatten
Chris wrote:Can you please show us the code?
BTW: In the test code from last post there are always warnings about longtxt not being defined, assumed to be Cursor which is fine.

Later on I receive second warning,
warning XS0165: Use of unassigned local variable 'lnCnt'

which is definately false, as first warning happens also on a line, where lnCnt (declared local) is assigned a value.

Code: Select all

		lnCnt = getwordcount(longtxt.mtext,lcWhite,REF lnSwitch)
which then is probably seen as "not valid". Since it is only line assigning lnCnt...

regards
thomas

FoxPro function list updated

Posted: Fri Feb 28, 2020 8:26 pm
by Chris
Hi Thomas,

Apologies for the delay, I just had a good look into this. Very nice and very thorough work, great to see!

Some comments regarding speed: The usage of delegates instead of normal method calls does slow down things a bit, but not too much, something like 5-10%. If it suits you more coding this this way, probably better keep it. Except maybe change only the IsInDelimiterArray logic, to call a standard method, because it is used a lot.

For small texts, what mainly slows down execution is that there's relatively a lot of overhead when calling the functions. Every time GetWordCount() is called, a GetWordHandler object is being created, which in turn creates other objects a dictionary etc. If you cache those objects, create them only once and reuse them in each next call to the function, you will get a great speed improvement (at least x2 or x3 with small strings).

With large texts though, this will not have a big impact and from what I see your core algorithm is very efficient already, could not spot another bottleneck. In your comparisons with VFP with very large tests, what's the speed difference to your routines?

FoxPro function list updated

Posted: Sun Mar 01, 2020 7:46 pm
by mainhatten
Hi Chris,

will answer to a few topics each with a separate post, as it is somewhat hectic here, and in different order.
Chris wrote:Apologies for the delay, I just had a good look into this. Very nice and very thorough work, great to see!
No apologies warranted - appreciate the time taken to look into it with more than a quick glance (and the compliment).
In your comparisons with VFP with very large tests, what's the speed difference to your routines?
Measurements were validated to have little variance (mostly turning all energy saving options off, Dotnet will show varying values otherwise whereas Vfp did not care), selected best run for each type.

Baseline = (100%) is vfp performance

If checking via Space(1) as delimiter:
Best xSharp ~78% (switch tnMethod 1/Default of !self:lSearchMany) [Yippee-ki-yay!]
other tnMethods of !self:lSearchMany (2..5) between 103..111%
via dict with single entry: 275..298% (ContainsKey called directly, via Delegte, via Function called as Delegate)
via .IndexOf: 282%
via Binary Search 340%

vfp Default delimiters: (Tab, LineFeed, Space(1)) 106% compared to pure Space(1)
next lines compared to vfp performance with same 3 chars, NOT to vfp with Space(1)!
handwritten IsVfpWhite (was a test balloon on a "range" idea, NOT really optimised) 115%, was called via Delegate
via Dict 220..233% (ContainsKey called directly, via Delegte, via Function called as Delegate)
via IndexOf ~285%
via BinarySearch ~350%

with 5 delimiters:
vfp takes 124% of time taken with Space(1)
next lines compared to vfp performance with same 5 chars, NOT to vfp with Space(1)!
via Dict 201..242% (ContainsKey called directly, via Delegte, via Function called as Delegate)
via IndexOf ~310%
via BinarySearch ~324%


all with longest WhiteStr as delimiter (38 chars long)
vfp takes 275% compared to pure Space(1)
next lines compared to vfp performance with same 38 chars, NOT to vfp with Space(1)!
via Dict 96..102% (ContainsKey called directly, via Delegte, via Function called as Delegate)
via IndexOf ~315%
via BinarySearch ~190%

tcDelimiter of 1 Char already possible to surpass vfp, a hunch coded quickly in IsVfpWhite nearly reaching vfp while encumbered with delegate call - better than expected after very first trials.
Embarrassing performance with dict and other multichar standard structures on tcDelimter.Length around expected sizes (3..12), but I have not really tried if IsVfpWhite can be made easier to digest for IL/CPU and not really tried to find other facets of "range" idea.

FoxPro function list updated

Posted: Sun Mar 01, 2020 10:25 pm
by mainhatten
Hi Chris,
Chris wrote:For small texts, what mainly slows down execution is that there's relatively a lot of overhead when calling the functions. Every time GetWordCount() is called, a GetWordHandler object is being created, which in turn creates other objects a dictionary etc. If you cache those objects, create them only once and reuse them in each next call to the function, you will get a great speed improvement (at least x2 or x3 with small strings)
don't worry - I am well aware that current code is somewhere between "abysmal" and "not really thought about" when looking at calling umpteen times with short strings.
Actually I am targeting even more than "just" caching those objects - the current focus on looong strings makes certain that lot of combinations in searched string which would need an extra test case are probably already handled AND make certain my algorithm to walk the string is sound even when encountering string lengths far to the right end of the Bell (or Poisson) distribution of probable lengths.
In reality most use cases follow the pattern of

Code: Select all

for lnRun := 1 to GetwordCount(lcString [, lcFixedDelimiters])
    = DoSomething(GetWordNum(lcString, lnRun[, lcFixedDelimiters])
next
which is only for small N more efficient compared to alines(laLines, lcString,[lcList of lcDelimiter[]]), as GetWordNum on each call has to start counting on start of lcString, whereas Alines can walk the string once from start to end, slicing out next array element whenever encountering next delimiter-string. Even seasoned vfp coders often "standardize" on alines(), as performance will not suffer when encountering a string with many elements to slice - at the cost of somewhat slower, but predictable performance on strings with few Words. I had a use case of reading words of slightly more than a dozen short string table fields sporting most often 1..4 words, seldom more than 6. Buuuut: those tables came from Big Iron, had more than a few million rows. So use case was

Code: Select all

scan for few_million_rows
    this.GetAllWords_and_handle_Name(CustNames->Name)
    this.GetAllWords_and_handle_Name(CustNames->Normalized_Name)
    select Adress
    scan for Adress.fk == CustNames.pk
        this.GetAllWords_and_handle_Adr(Adress->Adr)
        this.GetAllWords_and_handle_Adr(Adress->Normalized_Adr)
    endscan
    *--- for some other tables similar 0..N scan too wieldy to put into 1 denormalzed cursor
    select CustNames
endscan
where inside GetAllWords_and_handle the first code pattern exists. As performance option under xSharp was planned

Code: Select all

self:oGetWordHandler := GetWordHandler{}  && xSharp Core style
= self:oGetWordHandler:SetDelimiter(Space(1))
scan for few_million_rows
    *--- same as above
endscan
but in this.GetAllWords_and_handle()

Code: Select all

*-- was planned
for lnRun := 1 to self:oGetWordHandler:GetwordCount(tcString)
   = DoSomething(self:oGetWordHandler:GetWordNum(tcString, lnRun)
next
*-- in view of delegate performance, probably Inheritance-based
local loExecutor := self:oGetWordHandler:oActiveExecutor as GetWordExecutor
for lnRun := 1 to loExecutor:GetwordCount(tcString)
    = DoSomething(loExecutor:GetWordNum(tcString, lnRun)
next
ONLY the pure counting and extracting are needed - which will be documented as performance enhancement option after porting in the docs.

But I plan to do 1.5 rounds of long string tests with further fine-tuning before switching over to table test to optimize calling chain. .5 being the rewrite from delegates to a few traditional methods sometimes overwritten.

In table test 3 measures are planned:
call as implemented now in calls via existing vfp functions
cached GetWordHandler to reuse in calls via existing vfp functions
calling the optimized pattern above.

As subject of "caching" is already discussed: in vfp I would do my best to not add a "serviceobject" into vfp-public (==global or similar to "private in topmost function") namespace, but try to tuck it away perhaps in a singleton attached to _screen or goApp. in xSharp I am somewhat hazy on how/where to put such a cache.

Code: Select all

   FUNCTION GetWordCount( tcString AS STRING, tcDelimiters AS STRING, tnSwitch ref Int) AS LONG
        *-- Checked: throws on .Null.
        local lnReturn as Int
        local loSrch As GetWordHandler
        loSrch := GetWordHandler {}
        loSrch:SetDelimiter(tcDelimiters, ref tnSwitch)
        lnReturn := loSrch:GetWordCount(tcString)
        RETURN lnReturn
    *-- based on current implementation, not pie-in-the-sky from above
    Global __goGetWordHandler := GetWordHandler {} as GetWordHandler
    FUNCTION GetWordCount2( tcString AS STRING, tcDelimiters AS STRING, tnSwitch ref Int) AS LONG
        *-- Checked: throws on .Null.
        local lnReturn as Int
        __goGetWordHandler:SetDelimiter(tcDelimiters, ref tnSwitch)
        lnReturn := __goGetWordHandler:GetWordCount(tcString)
        RETURN lnReturn
at least compiles without errors, [not even run once, as I add more IsInDelimiters() to test], but I fear that __goGetWordHandler is "visible" just like functions defined in vfp namespace - nothing to strive for. Any hint where to put the cache - away from usercode in the runtime ? Where to look in sources ? [Hints from all other core devs very welcome]

Second fear is that code similar to the above is not threadsafe - no problem coming from vfp, which needs heavy lifting to enable multitasking/-threading code, but as C# aims for parallel execution in Threading, delegates calling cached __goGetWordHandler via vfp standard functions could be in trouble. Slow uncached version probably is thread safe, as they create each their own GetWordHandler{} in each call, but here as well creating a GetWordHandler{} inside parallel delegate is probably best practice.

Comments?

regards
thomas