A quick article on if you were given a webpage coded in HTML, what methods in AutoHotkey could you use to separate out the HTML Tables into a ListView.
Why?
I want a snippet of code that replicates any HTML table.
How?
I've been trying various ways so I'm posting them here. My opinions on them change with the weather so until I do some benchmarking I won't know which ones best:
Method #1
This method replicates the table in a listview format:
copyraw
	
StringReplace,ReturnedHTMLTableRows,ReturnedHTMLTable,,|,A
    ReturnedHTMLTableRows=%ReturnedHTMLTableRows%
    StringSplit,ReturnedHTMLTableRows,ReturnedHTMLTableRows,|
    RowIndex:=0
    Loop, %ReturnedHTMLTableRows0%
    {
        ColIndex:=1
        RowIndex++
        ReturnedHTMLTableColString:=ReturnedHTMLTableRows%RowIndex%
        If RowIndex=2
            StringSplit,HeadingsArray,HeadingsArrayString,|
        StringReplace,ReturnedHTMLTableColString,ReturnedHTMLTableColString,,|,A
        ReturnedHTMLTableColString=%ReturnedHTMLTableColString%
        StringSplit,ReturnedHTMLTableCols,ReturnedHTMLTableColString,|
        Loop, %ReturnedHTMLTableCols0%
        {
            ThisValue := RegExReplace( ReturnedHTMLTableCols%ColIndex%, "" , "")
            HeadingIndex := ColIndex + 2
            TypeLabel := HeadingsArray%HeadingIndex%
            If RowIndex=1
                HeadingsArrayString:=HeadingsArrayString "|" ThisValue
            If ColIndex=1
                ThisLabel := ThisValue
            Else
                If RowIndex1
                    If ColIndex%ReturnedHTMLTableCols0%
                        LV_Add("", ThisLabel " (" TypeLabel ")" , ThisValue )
            ColIndex++
        }
    }
	- StringReplace,ReturnedHTMLTableRows,ReturnedHTMLTable,,|,A
- ReturnedHTMLTableRows=%ReturnedHTMLTableRows%
- StringSplit,ReturnedHTMLTableRows,ReturnedHTMLTableRows,|
- RowIndex:=0
- Loop, %ReturnedHTMLTableRows0%
- {
- ColIndex:=1
- RowIndex++
- ReturnedHTMLTableColString:=ReturnedHTMLTableRows%RowIndex%
- If RowIndex=2
- StringSplit,HeadingsArray,HeadingsArrayString,|
- StringReplace,ReturnedHTMLTableColString,ReturnedHTMLTableColString,,|,A
- ReturnedHTMLTableColString=%ReturnedHTMLTableColString%
- StringSplit,ReturnedHTMLTableCols,ReturnedHTMLTableColString,|
- Loop, %ReturnedHTMLTableCols0%
- {
- ThisValue := RegExReplace( ReturnedHTMLTableCols%ColIndex%, "" , "")
- HeadingIndex := ColIndex + 2
- TypeLabel := HeadingsArray%HeadingIndex%
- If RowIndex=1
- HeadingsArrayString:=HeadingsArrayString "|" ThisValue
- If ColIndex=1
- ThisLabel := ThisValue
- Else
- If RowIndex1
- If ColIndex%ReturnedHTMLTableCols0%
- LV_Add("", ThisLabel " (" TypeLabel ")" , ThisValue )
- ColIndex++
- }
- }
Method #2
This method only uses two columns with the label being a concatenation of the value of the first column and the column heading.
copyraw
	
ExtractText( Haystack, Needle1a, Needle1b, Needle2a, NeedleMarker ){
        Needle1 := InStr( Haystack, Needle1a, false, NeedleMarker )
        Needle1 := InStr( Haystack, Needle1b, false, Needle1 ) + StrLen( Needle1b )
        Needle2 := InStr( Haystack, Needle2a, false, Needle1 )
        NeedleLen := Needle2 - Needle1
        NeedleMarker := Needle2
        ThisValue := SubStr( Haystack, Needle1, NeedleLen )
        ThisValue=%ThisValue%
        Return [ThisValue, NeedleMarker]
    }
; Usage
        ; get table HTML
        ReturnedValues := ExtractText( Haystack, Needle1a, Needle1b, Needle2a, NeedleMarker )
        TheHTMLTable := ReturnedValues[1]
        NeedleMarker := ReturnedValues[2]
	- ExtractText( Haystack, Needle1a, Needle1b, Needle2a, NeedleMarker ){
- Needle1 := InStr( Haystack, Needle1a, false, NeedleMarker )
- Needle1 := InStr( Haystack, Needle1b, false, Needle1 ) + StrLen( Needle1b )
- Needle2 := InStr( Haystack, Needle2a, false, Needle1 )
- NeedleLen := Needle2 - Needle1
- NeedleMarker := Needle2
- ThisValue := SubStr( Haystack, Needle1, NeedleLen )
- ThisValue=%ThisValue%
- Return [ThisValue, NeedleMarker]
- }
- ; Usage
- ; get table HTML
- ReturnedValues := ExtractText( Haystack, Needle1a, Needle1b, Needle2a, NeedleMarker )
- TheHTMLTable := ReturnedValues[1]
- NeedleMarker := ReturnedValues[2]
Snippets
Assume that "Haystack" is the string of code you want to parse (all content).
Function ExtractText extracts text given a unique string to mark the start of the extract (1a), a second string (1b) to refine the starting position of the extract, and a third string (2a) to specify the closing position of the extract. NeedleMarker is the offset and means when this function is used for several tables, the Needlemarker tells the function to start from where it last found a table and to find the next:
    ExtractText( Haystack, Needle1a, Needle1b, Needle2a, NeedleMarker ){
        Needle1 := InStr( Haystack, Needle1a, false, NeedleMarker )
        Needle1 := InStr( Haystack, Needle1b, false, Needle1 ) + StrLen( Needle1b )
        Needle2 := InStr( Haystack, Needle2a, false, Needle1 )
        NeedleLen := Needle2 - Needle1
        NeedleMarker := Needle2
        ThisValue := SubStr( Haystack, Needle1, NeedleLen )
        ThisValue=%ThisValue%
        Return [ThisValue, NeedleMarker]
    }
; Usage
        ; get table HTML
        ReturnedValues := ExtractText( Haystack, Needle1a, Needle1b, Needle2a, NeedleMarker )
        TheHTMLTable := ReturnedValues[1]
        NeedleMarker := ReturnedValues[2]
Category: Hypertext Markup Language :: Article: 496
	

 
						  
                 
						  
                 
						  
                 
						  
                 
						  
                 
 
 

 
 
Add comment