module Text.XML.HXT.Arrow.ProcessDocument
( parseXmlDocument
, parseXmlDocumentWithExpat
, parseHtmlDocument
, validateDocument
, propagateAndValidateNamespaces
, andValidateNamespaces
, getDocumentContents
)
where
import Control.Arrow
import Control.Arrow.ArrowIf
import Control.Arrow.ArrowList
import Control.Arrow.ArrowTree
import Control.Arrow.ListArrow (fromLA)
import Control.Arrow.NTreeEdit
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.XmlState
import Text.XML.HXT.Arrow.XmlState.TypeDefs
import Text.XML.HXT.Arrow.ParserInterface (parseHtmlDoc,
parseXmlDoc)
import Text.XML.HXT.Arrow.Edit (substAllXHTMLEntityRefs,
transfAllCharRef)
import Text.XML.HXT.Arrow.GeneralEntitySubstitution (processGeneralEntities)
import Text.XML.HXT.Arrow.DTDProcessing (processDTD)
import Text.XML.HXT.Arrow.DocumentInput (getXmlContents)
import Text.XML.HXT.Arrow.Namespace (propagateNamespaces, validateNamespaces)
import Text.XML.HXT.DTDValidation.Validation (generalEntitiesDefined,
getDTDSubset,
transform,
validate)
parseXmlDocument :: Bool -> Bool -> Bool -> Bool -> IOStateArrow s XmlTree XmlTree
parseXmlDocument :: forall s.
Bool -> Bool -> Bool -> Bool -> IOStateArrow s XmlTree XmlTree
parseXmlDocument Bool
validateD Bool
substDTD Bool
substHTML Bool
validateRX
= ( forall (a :: * -> * -> *) (t :: * -> *) b.
(ArrowTree a, Tree t) =>
a (t b) (t b) -> a (t b) (t b)
replaceChildren ( ( forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
a_source
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&&
forall (a :: * -> * -> *) n.
ArrowXml a =>
a n XmlTree -> a n String
xshow forall (a :: * -> * -> *) (t :: * -> *) b.
(ArrowTree a, Tree t) =>
a (t b) (t b)
getChildren
)
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall (a :: * -> * -> *). ArrowXml a => a (String, String) XmlTree
parseXmlDoc
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall s. IOStateArrow s XmlTree XmlTree
filterErrorMsg
)
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall s. String -> IOStateArrow s XmlTree XmlTree
setDocumentStatusFromSystemState String
"parse XML document"
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
( forall (a :: * -> * -> *) b c d.
ArrowIf a =>
a b c -> a b d -> a b d -> a b d
ifA (forall (a :: * -> * -> *) b c. ArrowList a => LA b c -> a b c
fromLA XmlArrow
getDTDSubset)
( forall s. IOStateArrow s XmlTree XmlTree
processDTDandEntities
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
( if Bool
validate'
then forall s. IOStateArrow s XmlTree XmlTree
validateDocument
else forall (a :: * -> * -> *) b. ArrowList a => a b b
this
)
)
( if Bool
validate'
then forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
2 String
"checkUndefinedEntityRefs: looking for undefined entity refs"
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall (a :: * -> * -> *) b c. ArrowList a => a b c -> a b b
perform forall s. IOStateArrow s XmlTree XmlTree
checkUndefinedEntityRefs
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
2 String
"checkUndefinedEntityRefs: looking for undefined entity refs done"
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall s. String -> IOStateArrow s XmlTree XmlTree
setDocumentStatusFromSystemState String
"decoding document"
else forall (a :: * -> * -> *) b. ArrowList a => a b b
this
)
)
)
forall (a :: * -> * -> *) b c. ArrowIf a => a b b -> a b c -> a b b
`when` forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
documentStatusOk
where
validate' :: Bool
validate'
= Bool
validateD Bool -> Bool -> Bool
&& Bool -> Bool
not Bool
validateRX
processDTDandEntities :: IOSLA (XIOState s) XmlTree XmlTree
processDTDandEntities
= ( if Bool
validateD Bool -> Bool -> Bool
|| Bool
substDTD
then forall s. IOStateArrow s XmlTree XmlTree
processDTD
else forall (a :: * -> * -> *) b. ArrowList a => a b b
this
)
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
( if Bool
substDTD
then ( forall s. IOStateArrow s XmlTree XmlTree
processGeneralEntities
forall (a :: * -> * -> *) b c. ArrowIf a => a b b -> a b c -> a b b
`when`
forall (a :: * -> * -> *) b c. ArrowList a => LA b c -> a b c
fromLA XmlArrow
generalEntitiesDefined
)
else if Bool
substHTML
then forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
substAllXHTMLEntityRefs
else forall (a :: * -> * -> *) b. ArrowList a => a b b
this
)
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
transfAllCharRef
checkUndefinedEntityRefs :: IOStateArrow s XmlTree XmlTree
checkUndefinedEntityRefs :: forall s. IOStateArrow s XmlTree XmlTree
checkUndefinedEntityRefs
= forall (a :: * -> * -> *) (t :: * -> *) b c.
(ArrowTree a, Tree t) =>
a (t b) c -> a (t b) c
deep forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
isEntityRef
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall (a :: * -> * -> *). ArrowXml a => a XmlTree String
getEntityRef
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall (a :: * -> * -> *) b c. Arrow a => (b -> c) -> a b c
arr (\ String
en -> String
"general entity reference \"&" forall a. [a] -> [a] -> [a]
++ String
en forall a. [a] -> [a] -> [a]
++ String
";\" is undefined")
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall (a :: * -> * -> *). ArrowXml a => Int -> a String XmlTree
mkError Int
c_err
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall s. IOStateArrow s XmlTree XmlTree
filterErrorMsg
parseXmlDocumentWithExpat :: IOStateArrow s XmlTree XmlTree
parseXmlDocumentWithExpat :: forall s. IOStateArrow s XmlTree XmlTree
parseXmlDocumentWithExpat
= ( forall b c s0. IOSArrow b c -> IOStateArrow s0 b c
withoutUserState forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar Selector XIOSysState (IOSArrow XmlTree XmlTree)
theExpatParser
)
forall (a :: * -> * -> *) b c. ArrowIf a => a b b -> a b c -> a b b
`when` forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
documentStatusOk
parseHtmlDocument :: IOStateArrow s XmlTree XmlTree
parseHtmlDocument :: forall s. IOStateArrow s XmlTree XmlTree
parseHtmlDocument
= ( forall (a :: * -> * -> *) b c. ArrowList a => a b c -> a b b
perform ( forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
a_source
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall b s. Int -> (b -> String) -> IOStateArrow s b b
traceValue Int
1 ((String
"parseHtmlDoc: parse HTML document " forall a. [a] -> [a] -> [a]
++) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> String
show)
)
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
( forall {s0}. (Bool, Bool) -> IOSLA (XIOState s0) XmlTree XmlTree
parseHtml forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar (Selector XIOSysState Bool
theTagSoup forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&. Selector XIOSysState Bool
theExpat) )
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
( forall {s0}. (Bool, Bool) -> IOSLA (XIOState s0) XmlTree XmlTree
removeWarnings forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar (Selector XIOSysState Bool
theWarnings forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&. Selector XIOSysState Bool
theTagSoup) )
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall s. String -> IOStateArrow s XmlTree XmlTree
setDocumentStatusFromSystemState String
"parse HTML document"
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall s. IOStateArrow s XmlTree XmlTree
traceTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall s. IOStateArrow s XmlTree XmlTree
traceSource
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall (a :: * -> * -> *) b c. ArrowList a => a b c -> a b b
perform ( forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
a_source
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall b s. Int -> (b -> String) -> IOStateArrow s b b
traceValue Int
1 (\ String
src -> String
"parse HTML document " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show String
src forall a. [a] -> [a] -> [a]
++ String
" finished")
)
)
forall (a :: * -> * -> *) b c. ArrowIf a => a b b -> a b c -> a b b
`when` forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
documentStatusOk
where
parseHtml :: (Bool, Bool) -> IOSLA (XIOState s0) XmlTree XmlTree
parseHtml (Bool
withTagSoup', Bool
withExpat')
| Bool
withExpat' = forall b c s0. IOSArrow b c -> IOStateArrow s0 b c
withoutUserState forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar Selector XIOSysState (IOSArrow XmlTree XmlTree)
theExpatParser
| Bool
withTagSoup' = forall b c s0. IOSArrow b c -> IOStateArrow s0 b c
withoutUserState forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar Selector XIOSysState (IOSArrow XmlTree XmlTree)
theTagSoupParser
| Bool
otherwise = forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
1 (String
"parse document with parsec HTML parser")
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall (a :: * -> * -> *) (t :: * -> *) b.
(ArrowTree a, Tree t) =>
a (t b) (t b) -> a (t b) (t b)
replaceChildren
( ( forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
a_source
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&&
forall (a :: * -> * -> *) n.
ArrowXml a =>
a n XmlTree -> a n String
xshow forall (a :: * -> * -> *) (t :: * -> *) b.
(ArrowTree a, Tree t) =>
a (t b) (t b)
getChildren
)
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall (a :: * -> * -> *).
ArrowList a =>
a (String, String) XmlTree
parseHtmlDoc
)
removeWarnings :: (Bool, Bool) -> IOSLA (XIOState s) XmlTree XmlTree
removeWarnings (Bool
warnings, Bool
withTagSoup')
| Bool
warnings = forall (a :: * -> * -> *).
ArrowXml a =>
a XmlTree XmlTree -> a XmlTree XmlTree
processTopDownWithAttrl
forall s. IOStateArrow s XmlTree XmlTree
filterErrorMsg
| Bool
withTagSoup' = forall (a :: * -> * -> *) b. ArrowList a => a b b
this
| Bool
otherwise = forall (a :: * -> * -> *) b c. ArrowList a => LA b c -> a b c
fromLA forall a b. (a -> b) -> a -> b
$
forall b c.
[IfThen (LA (NTree b) c) (LA (NTree b) (NTree b))]
-> LA (NTree b) (NTree b)
editNTreeA [forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
isError forall a b. a -> b -> IfThen a b
:-> forall (a :: * -> * -> *) b c. ArrowList a => a b c
none]
validateDocument :: IOStateArrow s XmlTree XmlTree
validateDocument :: forall s. IOStateArrow s XmlTree XmlTree
validateDocument
= ( forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
1 String
"validating document"
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall (a :: * -> * -> *) b c. ArrowList a => a b c -> a b b
perform ( forall (a :: * -> * -> *). ArrowList a => a XmlTree XmlTree
validateDoc
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall s. IOStateArrow s XmlTree XmlTree
filterErrorMsg
)
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall s. String -> IOStateArrow s XmlTree XmlTree
setDocumentStatusFromSystemState String
"document validation"
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
1 String
"document validated, transforming doc with respect to DTD"
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall (a :: * -> * -> *). ArrowList a => a XmlTree XmlTree
transformDoc
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
1 String
"document transformed"
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall s. IOStateArrow s XmlTree XmlTree
traceSource
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall s. IOStateArrow s XmlTree XmlTree
traceTree
)
forall (a :: * -> * -> *) b c. ArrowIf a => a b b -> a b c -> a b b
`when`
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
documentStatusOk
propagateAndValidateNamespaces :: IOStateArrow s XmlTree XmlTree
propagateAndValidateNamespaces :: forall s. IOStateArrow s XmlTree XmlTree
propagateAndValidateNamespaces
= ( forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
1 String
"propagating namespaces"
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
propagateNamespaces
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall s. String -> IOStateArrow s XmlTree XmlTree
traceDoc String
"propagating namespaces done"
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall s. IOStateArrow s XmlTree XmlTree
andValidateNamespaces
)
forall (a :: * -> * -> *) b c. ArrowIf a => a b b -> a b c -> a b b
`when`
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
documentStatusOk
andValidateNamespaces :: IOStateArrow s XmlTree XmlTree
andValidateNamespaces :: forall s. IOStateArrow s XmlTree XmlTree
andValidateNamespaces
= ( forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
1 String
"validating namespaces"
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
( forall s. String -> IOStateArrow s XmlTree XmlTree
setDocumentStatusFromSystemState String
"namespace propagation"
forall (a :: * -> * -> *) b c. ArrowIf a => a b b -> a b c -> a b b
`when`
( forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
validateNamespaces forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall (a :: * -> * -> *) b c. ArrowList a => a b c -> a b b
perform forall s. IOStateArrow s XmlTree XmlTree
filterErrorMsg )
)
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
1 String
"namespace validation finished"
)
forall (a :: * -> * -> *) b c. ArrowIf a => a b b -> a b c -> a b b
`when`
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
documentStatusOk
getDocumentContents :: String -> IOStateArrow s b XmlTree
getDocumentContents :: forall s b. String -> IOStateArrow s b XmlTree
getDocumentContents String
src
= forall (a :: * -> * -> *) n.
ArrowXml a =>
[a n XmlTree] -> [a n XmlTree] -> a n XmlTree
root [] []
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall (a :: * -> * -> *).
ArrowXml a =>
String -> String -> a XmlTree XmlTree
addAttr String
a_source String
src
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
1 (String
"readDocument: start processing document " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show String
src)
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall s. IOStateArrow s XmlTree XmlTree
getXmlContents
validateDoc :: ArrowList a => a XmlTree XmlTree
validateDoc :: forall (a :: * -> * -> *). ArrowList a => a XmlTree XmlTree
validateDoc = forall (a :: * -> * -> *) b c. ArrowList a => LA b c -> a b c
fromLA ( XmlArrow
validate
forall (a :: * -> * -> *) b c. ArrowIf a => a b b -> a b c -> a b b
`when`
XmlArrow
getDTDSubset
)
transformDoc :: ArrowList a => a XmlTree XmlTree
transformDoc :: forall (a :: * -> * -> *). ArrowList a => a XmlTree XmlTree
transformDoc = forall (a :: * -> * -> *) b c. ArrowList a => LA b c -> a b c
fromLA XmlArrow
transform