I am following all the XBase languages, to just have an idea of what is happening in the XBase world. Although there are certain syntax differences between lets call it the Clipper and Foxbase forks, we still have the DBase programming language as basis.
Obviously the VFP language pack is still not available and DBF drivers not complete, which will greatly enhance a "Transport".
I saw an example on the ProFox forum (https://leafe.com/dls/vfp) to validate if a TextFile is UTF8 compliant and thought it might be a good example to try and convert with the least amount of changes to be X# compiled.
I decided after reviewing the code it should not be too difficult and set myself the following mandate:
- Touch the original ValidateUTF() code as little as possible, except syntax differences that can be done with a Replace all e.g. "&&" to "//&&" etc.
- Write any VFP functions called in the example from scratch that is not found by the XSharp compiler even if found with a different name in the XSharp Runtime functions, e.g. SUBSTRC is available as SUBSTR, SUBSTR2, SUBSTR3. Wrap around .NET available features, there are millions!
[*]
[*]Add FUNCTION Start() around test code.
[*]Replace "&&" with "//&&", "m." with "", "=" with ":=", " OR " with " .OR. ", " AND " with " .AND. "
[*]Replace ENDFUNC with RETURN where necessary
[*]Scan through code and change ":=" in conditional statements back to "=" // WHILE, IF, CASE statements. Assignment in X# is done with ":=", "+=", "++" etc, while "=", "==" are comparison operators.
[*]Compile, respond to errors "No BITAND, BITOR, BITLSHIFT, FILETOSTR, ASC, SUBSTRC, LENC functions", create functions
[*]Strict type variables, add "AS <Type>" to LOCAL declarations, FUNCTION parameters. Not necessary, but speed improvement is HUGE if done instead of using variable type declarations, e.g. LOCAL x translates to LOCAL x AS USUAL, compared to LOCAL x AS INT or VAR x := 0 will be strict typed by XSharp to an INT.
[*]Compile successfully
[*]Test successful. 100% XSharp compiled and executed with correct results!
Here is the resulting code, original code can be found at the link provided above if somebody is interested:
Code: Select all
/*
************************************************************************
*** UTF-8 string check validity function
*** Version 1.0 - 12-20-2016
************************************************************************
*** José Enrique Llopis
*** jellopis@rocketmail.com www.multilinkcrm.com
*** My online résumé https://es.linkedin.com/in/pepellopis
*** Alicante – Spain
************************************************************************/
FUNCTION Start( ) AS VOID // The entry point to all X# applications
LOCAL lcString AS STRING
lcString := FILETOSTR( "Spanish_Text.txt")
? ValidateUtf8( lcString )
lcString = FILETOSTR( "good.txt") // *** Russian cyrillic utf8 text
? ValidateUtf8( lcString )
lcString = FILETOSTR( "bad.txt")
? ValidateUtf8( lcString )
RETURN
FUNCTION ValidateUtf8( tcBuffer AS STRING ) AS LOGIC
LOCAL lcCharPoint AS STRING
LOCAL lnBufferLen, lnCounter, lnCodepointValue AS INT
// m.lcByteInicial := SUBSTRC( tcBuffer, 1) // Assigned, never used, only 2 parameters typo? X# compiler warning
// m.lcByteFinal := SUBSTRC( tcBuffer, LENC(tcBuffer), 1 ) // Assigned, never used. X# compiler warning
lnBufferLen := LENC( tcBuffer )
lnCounter := 1
DO WHILE lnCounter <= lnBufferLen
lcCharPoint := SUBSTRC( tcBuffer, lnCounter, 1 )
IF Asc( lcCharPoint) < 128 //&& Single BYTE codepoint
lnCounter := lnCounter + 1 // lnCounter++ in XSharp
LOOP
ENDIF
DO CASE
CASE BITAND( Asc( lcCharPoint ), 0xE0 ) = 0xC0 //&& Two bytes codepoint
IF lnCounter + 1 > lnBufferLen
RETURN .F.
ENDIF
//&& Check FOR overlong form (8th or above data bit must be set)
IF BITAND( Asc( lcCharPoint ), 0x1E ) = 0
RETURN .F.
ENDIF
//&& Check continuation BYTE
IF BITAND( Asc(SUBSTRC( tcBuffer, lnCounter+1, 1 )), 0xC0 ) <> 0x80
RETURN .F.
ENDIF
//&& Don't have to check code point validity. Can't have a large
//&& enough value TO be one of the invalid ones.
lnCounter := lnCounter + 2 // X# syntax lnCounter += 2
LOOP
CASE BITAND( Asc( lcCharPoint ), 0xF0 ) = 0xE0 //&& Three bytes codepoint
IF lnCounter + 2 > lnBufferLen
RETURN .F.
ENDIF
//&& Check continuation BYTE
IF ( BITAND( Asc(SUBSTRC( tcBuffer, lnCounter+1, 1 )), 0xC0 ) <> 0x80 ) .OR. ; //X# "||" for "OR", alternative ".OR."
( BITAND( Asc(SUBSTRC( tcBuffer, lnCounter+2, 1 )), 0xC0 ) <> 0x80 )
RETURN .F.
ENDIF
//&& Convert TO code point
lnCodepointValue := BITOR( ;
BITLSHIFT( BITAND( Asc( lcCharPoint), 0x0F), 12 ), ;
BITLSHIFT( BITAND( Asc(SUBSTRC( tcBuffer, lnCounter+1, 1 )), 0x3F ), 6 ), ;
BITAND( Asc(SUBSTRC( tcBuffer, lnCounter+2, 1 )), 0x3F ) ;
)
//&& Check FOR overlong form (11th or above data bit must be set)
//&& FOR example, U+0020 IS represented IN UTF-8 by the single BYTE 0x20. IF you decode the
//&& two bytes 0xc0 0xa0 IN the normal fashion, you'll still end up back at U+0020, but that's an invalid representation.
IF lnCodepointValue < BITLSHIFT( 1,11)
RETURN .F.
ENDIF
//&& Check code point legality.
IF isValidCodePoint( lnCodepointValue ) = .F.
RETURN .F.
ENDIF
lnCounter := lnCounter + 3 // X# syntax lnCounter += 3
LOOP
CASE BITAND( Asc( lcCharPoint ), 0xF8 ) = 0xF0 //&& four bytes codepoint
IF lnCounter + 3 > lnBufferLen
RETURN .F.
ENDIF
//&& Check continuation BYTE
IF ( BITAND( Asc(SUBSTRC( tcBuffer, lnCounter+1, 1 )), 0xC0 ) <> 0x80 ) .OR. ;
( BITAND( Asc(SUBSTRC( tcBuffer, lnCounter+2, 1 )), 0xC0 ) <> 0x80 ) .OR. ;
( BITAND( Asc(SUBSTRC( tcBuffer, lnCounter+3, 1 )), 0xC0 ) <> 0x80 )
RETURN .F.
ENDIF
//&& Convert TO code point
lnCodepointValue := BITOR( ;
BITLSHIFT( BITAND( Asc( lcCharPoint), 0x07), 18 ), ;
BITLSHIFT( BITAND( Asc(SUBSTRC( tcBuffer, lnCounter+1, 1 )), 0x3F ), 12 ), ;
BITLSHIFT( BITAND( Asc(SUBSTRC( tcBuffer, lnCounter+2, 1 )), 0x3F ), 12 ), ;
BITAND( Asc(SUBSTRC( tcBuffer, lnCounter+3, 1 )), 0x3F ) ;
)
//&& Check FOR overlong form (11th or above data bit must be set)
IF lnCodepointValue < BITLSHIFT( 1,11)
RETURN .F.
ENDIF
//&& Check code point legality.
IF isValidCodePoint( lnCodepointValue ) = .F.
RETURN .F.
ENDIF
lnCounter := lnCounter + 4 // X# syntax lnCOunter += 4
LOOP
OTHERWISE //&& Invalid length, or not start BYTE
RETURN .F.
ENDCASE
ENDDO
RETURN .T.
FUNCTION isValidCodePoint( tnCodePoint AS INT) AS LOGIC
LOCAL lValid AS LOGIC
IF ( tnCodePoint >= 0xD800 ) .AND. ( tnCodePoint <= 0xDFFF ) // Surrogates
lValid := FALSE
ELSE
lValid := tnCodePoint <= 0x10FFFF // Maximum value
ENDIF
RETURN lValid
// VFP functions not in X# Runtime Funcs or with alternative name
FUNCTION FILETOSTR(name AS STRING) AS STRING
VAR oFS := System.IO.FileStream{name, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read}
VAR oRdr := System.IO.StreamReader{oFS} // X# also recognize StreamReader(oFS) for creating objects
VAR content := oRdr:ReadToEnd() // X# also recognize oRdr.ReadToEnd()
oFS:Close()
oRdr:Close()
RETURN content
FUNCTION SUBSTRC(s AS STRING, iOffSet AS INT, iCount AS INT) AS STRING
// Note .NET Strings are Zero indexed
RETURN s:Substring(iOffSet - 1, iCount)
FUNCTION SUBSTRC(s AS STRING, iOffSet AS INT) AS STRING
// Note .NET Strings are Zero indexed
RETURN s:Substring(iOffSet - 1)
FUNCTION LENC(s AS STRING) AS INT
RETURN s:Length
FUNCTION ASC(s AS STRING) AS INT
RETURN (INT)(BYTE)s[0] // Strings are array of Char. We can cast Char to Byte and Byte to Int
FUNCTION BITAND(iV1 AS INT, iV2 AS INT) AS INT
RETURN _And(iV1, iV2)
//FUNCTION BITAND(iV1 AS INT, iV2 AS INT, iV3 AS INT) AS INT
//RETURN iV1 & iV2 & iV3 // The short for unary AND operations.
FUNCTION BITOR(iV1 AS INT, iV2 AS INT) AS INT
RETURN _Or(iV1, iV2)
FUNCTION BITOR(iV1 AS INT, iV2 AS INT, iV3 AS INT) AS INT
RETURN iV1 | iV2 | iV3 // Three parameter overloaded function for BITOR using XSharp/.NET syntax
FUNCTION BITOR(iV1 AS INT, iV2 AS INT, iV3 AS INT, iV4 AS INT) AS INT
RETURN iV1 | iV2 | iV3 | iV4 // Four parameter overload of BITOR
FUNCTION BITLSHIFT(iValue AS INT, iShift AS INT) AS INT
RETURN iValue << iShift
//FUNCTION BITRSHIFT(iValue AS INT, iShift AS INT) AS INT
//RETURN