Script 8

Presentation Notes

Well, now we'll need to move to an actual script editor to work with the code, although I'll still save it here.

I'll post up details of this presentation at http://www.danshockley.com/form_processing.php

Script Source

-- Form Processing

-- the example here is a search on PetFinder.com

-- version 1.0

property searchBase : "http://www.petfinder.com/pet.cgi"

property searchTerms : "action=1&pet.Animal=Dog&pet.Breed=malamute&pet.Age=Adult&pet.Size=&pet.Sex=M&location=NJ"

property beforeResultsList : "Organization"

property afterResultsList : "

"

property beforeEachResultLine : "property afterEachResultsLine : ""

property blankRecord : {orgLink:"", detailLink:"", organization:"", dominantBreed:"", age:"", sex:"", petName:"", pictureLink:"", features:""}

property beforeOneField : "property afterOneField : ""

property baseURLForLinks : "http://petfinder.com/"

property beforeLink : "property afterLink : "\">"

property afterLinkText : ""

on run

-- get the source as a string

set sourceHTML to curlSimpleDownload(searchBase, "", searchTerms)

set resultsBlock to my getTextBetween(sourceHTML, beforeResultsList, afterResultsList)

set resultsList to my getTextBetweenMultiple(resultsBlock, beforeEachResultLine, afterEachResultsLine)

set petRecordList to {}

repeat with oneRawResult in resultsList

set onePetRecord to blankRecord

set rawFields to getTextBetweenMultiple(oneRawResult, beforeOneField, afterOneField)

set relativeOrgLink to my getTextBetween(item 1 of rawFields, beforeLink, afterLink)

--set orgLink of onePetRecord to baseURLForLinks & relativeOrgLink

set organization of onePetRecord to getTextBetween(item 1 of rawFields, (relativeOrgLink & afterLink) as string, afterLinkText)

set relativeDetailLink to my getTextBetween(item 5 of rawFields, beforeLink, afterLink)

--set detailLink of onePetRecord to baseURLForLinks & relativeDetailLink

set petName of onePetRecord to getTextBetween(item 5 of rawFields, (relativeDetailLink & afterLink) as string, afterLinkText)

copy onePetRecord to end of petRecordList

end repeat

petRecordList

end run

on curlSimpleDownload(downloadURL, destExpected, theFormInfo)

-- version 1.1, Daniel A. Shockley - public domain

-- downloadURL is STRING

-- saves to destExpected (Mac path as STRING, FILE SPEC, or ALIAS), if given

-- if destExpected is "", returns source result directly as string

-- optional form data for POST - use "" for no form data

try

-- basic download to standard output

set curlCode to "curl \"" & downloadURL & "\""

if (length of theFormInfo) > 0 then

set curlCode to curlCode & " -d \"" & theFormInfo & "\""

end if

-- now, add on the desired file location, if there is one given

if destExpected is not "" then

set unixDestExpected to quoted form of POSIX path of (destExpected as string)

set curlCode to curlCode & " --output " & unixDestExpected & " --write-out \"%{http_code}\""

else -- result as string

set curlCode to curlCode & " | vis" -- pipe into vis to strip nonprintable characters

end if

set curlResponse to do shell script curlCode

return curlResponse

(*

curlResponse will be the http success code ("200"), or an error code.

If no destination was given, curlResponse will be the source

returned, and no file will be saved

*)

on error errMsg number errNum

error "curlSimpleDownload FAILED: " & errMsg number errNum

end try

end curlSimpleDownload

on simpleReplace(thisText, oldChars, newChars)

-- version 1.1

-- 1.1 coerces the newChars to a STRING, since other data types do not always coerce

-- (example, replacing "nine" with 9 as number replaces with "")

set oldDelims to AppleScript's text item delimiters

set AppleScript's text item delimiters to the oldChars

set the parsedList to every text item of thisText

set AppleScript's text item delimiters to the {(newChars as string)}

set the newText to the parsedList as string

set AppleScript's text item delimiters to oldDelims

return newText

end simpleReplace

on getTextBetween(sourceText, beforeText, afterText)

-- version 1.1

-- gets the text between the first occurrences of beforeText and afterText in sourceText

try

set oldDelims to AppleScript's text item delimiters

set AppleScript's text item delimiters to the beforeText

set the prefixRemoved to text item 2 of sourceText

set AppleScript's text item delimiters to afterText

set the finalResult to text item 1 of prefixRemoved

set AppleScript's text item delimiters to oldDelims

return finalResult

on error errMsg number errNum

set AppleScript's text item delimiters to {""}

return "" -- return nothing if the surrounding text is not found

end try

end getTextBetween

on parseChars(thisText, parseString)

-- version 1.1

try

set oldDelims to AppleScript's text item delimiters

set AppleScript's text item delimiters to the {parseString as string}

set the parsedList to every text item of thisText

set AppleScript's text item delimiters to oldDelims

return parsedList

on error errMsg number errNum

error "ERROR: parseChars() handler: " & errMsg number errNum

end try

end parseChars

on testPathExists(inputPath)

-- version 1.4

-- from Richard Morton, on applescript-users@lists.apple.com

-- public domain, of course. :-)

-- gets somewhat slower as nested-depth level goes over 10 nested folders

if inputPath is not equal to "" then try

get alias inputPath as string

return true

end try

return false

end testPathExists

on getTextBetweenMultiple(sourceText, beforeText, afterText)

-- version 1.1

-- gets the text between all occurrences of beforeText and afterText in sourceText, and returns a list of strings

-- NEEDs parseChars()

--EXAMPLE USE:

--set someNames to "name: Bob, birth: 7/23/1978; name: Dan, birth: 3/12/1975; name: Jeff, birth: 4/6/1976"

--set nameList to my getTextBetweenMultiple(someNames, "name: ", ", birth")

try

set parsedByBefore to my parseChars(sourceText, beforeText)

if length of parsedByBefore is 1 then return {}

set parsedByBefore to items 2 through -1 of parsedByBefore

set foundTextList to {}

repeat with oneParsedSection in parsedByBefore

set parsedList to my parseChars(oneParsedSection as string, afterText)

if length of parsedList is not 1 then

copy (item 1 of parsedList) as string to end of foundTextList

end if

end repeat

return foundTextList

on error errMsg number errNum

-- will not error if parsing datum not found, will return empty list (see above)

error "getTextBetweenMultiple FAILED: " & errMsg number errNum

end try

end getTextBetweenMultiple