{-# LANGUAGE OverloadedStrings, TupleSections, ViewPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE TypeFamilies #-}
module Text.HTML.TagStream.Text where
import Control.Applicative
import Control.Monad (unless, when, liftM)
import qualified Control.Monad.Fail as Fail
import Control.Monad.Trans.Class (lift)
import Control.Monad.Trans.Resource (MonadThrow)
import Data.Char
import qualified Data.Conduit.List as CL
import Data.Default
import Prelude hiding (mapM)
import qualified Data.Attoparsec.ByteString.Char8 as S
import Data.Attoparsec.Text
import Data.ByteString (ByteString)
import qualified Data.CaseInsensitive as CI
import Data.Conduit
import Data.Functor.Identity (runIdentity)
import Data.Maybe (fromMaybe)
import Data.Monoid (mconcat)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
import qualified Data.Text.Lazy.Builder as B
import Data.Traversable (mapM)
import qualified Text.XML.Stream.Parse as XML
#if MIN_VERSION_conduit(1, 0, 0)
#else
import Data.Conduit.Internal (pipeL)
#endif
import qualified Data.Conduit.List as C
import qualified Data.Conduit.Attoparsec as C
import qualified Data.Conduit.Text as C
import qualified Text.HTML.TagStream.ByteString as S
import Text.HTML.TagStream.Entities
import Text.HTML.TagStream.Types
import Text.HTML.TagStream.Utils (splitAccum)
type Token = Token' Text
type Attr = Attr' Text
quoted :: Char -> Parser Text
quoted :: Char -> Parser Text
quoted q :: Char
q = Text -> Text -> Text
T.append (Text -> Text -> Text) -> Parser Text -> Parser Text (Text -> Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text
takeTill ((Char, Char) -> Char -> Bool
forall a. Eq a => (a, a) -> a -> Bool
in2 ('\\',Char
q))
Parser Text (Text -> Text) -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ( Char -> Parser Char
char Char
q Parser Char -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure ""
Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Parser Char
char '\\' Parser Char -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Int -> Parser Text -> Parser Text
atLeast 1 (Char -> Parser Text
quoted Char
q) )
quotedOr :: Parser Text -> Parser Text
quotedOr :: Parser Text -> Parser Text
quotedOr p :: Parser Text
p = Parser Char -> Parser (Maybe Char)
forall a. Parser a -> Parser (Maybe a)
maybeP ((Char -> Bool) -> Parser Char
satisfy ((Char, Char) -> Char -> Bool
forall a. Eq a => (a, a) -> a -> Bool
in2 ('"','\''))) Parser (Maybe Char) -> (Maybe Char -> Parser Text) -> Parser Text
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
Parser Text -> (Char -> Parser Text) -> Maybe Char -> Parser Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Parser Text
p Char -> Parser Text
quoted
attrValue :: Parser Text
attrValue :: Parser Text
attrValue = Parser Text -> Parser Text
quotedOr (Parser Text -> Parser Text) -> Parser Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Parser Text
takeTill ((Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
=='>') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
attrName :: Parser Text
attrName :: Parser Text
attrName = Parser Text -> Parser Text
quotedOr (Parser Text -> Parser Text) -> Parser Text -> Parser Text
forall a b. (a -> b) -> a -> b
$
Char -> Text -> Text
T.cons (Char -> Text -> Text) -> Parser Char -> Parser Text (Text -> Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Char
satisfy (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/='>')
Parser Text (Text -> Text) -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Bool) -> Parser Text
takeTill ((Char, Char, Char) -> Char -> Bool
forall a. Eq a => (a, a, a) -> a -> Bool
in3 ('/','>','=') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
tagEnd :: Parser Bool
tagEnd :: Parser Bool
tagEnd = Char -> Parser Char
char '>' Parser Char -> Parser Bool -> Parser Bool
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Bool -> Parser Bool
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False
Parser Bool -> Parser Bool -> Parser Bool
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> Parser Text
string "/>" Parser Text -> Parser Bool -> Parser Bool
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Bool -> Parser Bool
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True
attr :: Parser Attr
attr :: Parser Attr
attr = (,) (Text -> Text -> Attr) -> Parser Text -> Parser Text (Text -> Attr)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
attrName Parser Text (Text -> Attr)
-> Parser Text () -> Parser Text (Text -> Attr)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Text ()
skipSpace
Parser Text (Text -> Attr) -> Parser Text -> Parser Attr
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ( Parser Char -> Parser Bool
forall a. Parser a -> Parser Bool
boolP (Char -> Parser Char
char '=') Parser Bool -> (Bool -> Parser Text) -> Parser Text
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
Parser Text -> Parser Text -> Bool -> Parser Text
forall a. a -> a -> Bool -> a
cond (Parser Text ()
skipSpace Parser Text () -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
attrValue)
(Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure "")
)
attrs :: Parser ([Attr], Bool)
attrs :: Parser ([Attr], Bool)
attrs = [Attr] -> Parser ([Attr], Bool)
loop []
where
loop :: [Attr] -> Parser ([Attr], Bool)
loop acc :: [Attr]
acc = Parser Text ()
skipSpace Parser Text ()
-> Parser Text (Either Bool Attr) -> Parser Text (Either Bool Attr)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Bool -> Either Bool Attr
forall a b. a -> Either a b
Left (Bool -> Either Bool Attr)
-> Parser Bool -> Parser Text (Either Bool Attr)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Bool
tagEnd Parser Text (Either Bool Attr)
-> Parser Text (Either Bool Attr) -> Parser Text (Either Bool Attr)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Attr -> Either Bool Attr
forall a b. b -> Either a b
Right (Attr -> Either Bool Attr)
-> Parser Attr -> Parser Text (Either Bool Attr)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Attr
attr) Parser Text (Either Bool Attr)
-> (Either Bool Attr -> Parser ([Attr], Bool))
-> Parser ([Attr], Bool)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
(Bool -> Parser ([Attr], Bool))
-> (Attr -> Parser ([Attr], Bool))
-> Either Bool Attr
-> Parser ([Attr], Bool)
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either
(([Attr], Bool) -> Parser ([Attr], Bool)
forall (m :: * -> *) a. Monad m => a -> m a
return (([Attr], Bool) -> Parser ([Attr], Bool))
-> (Bool -> ([Attr], Bool)) -> Bool -> Parser ([Attr], Bool)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Attr] -> [Attr]
forall a. [a] -> [a]
reverse [Attr]
acc,))
([Attr] -> Parser ([Attr], Bool)
loop ([Attr] -> Parser ([Attr], Bool))
-> (Attr -> [Attr]) -> Attr -> Parser ([Attr], Bool)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Attr -> [Attr] -> [Attr]
forall a. a -> [a] -> [a]
:[Attr]
acc))
comment :: Parser Token
= Text -> Token
forall s. s -> Token' s
Comment (Text -> Token) -> Parser Text -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
comment'
where comment' :: Parser Text
comment' = Text -> Text -> Text
T.append (Text -> Text -> Text) -> Parser Text -> Parser Text (Text -> Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
=='-')
Parser Text (Text -> Text) -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ( Text -> Parser Text
string "-->" Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Text -> Parser Text
forall (m :: * -> *) a. Monad m => a -> m a
return ""
Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Int -> Parser Text -> Parser Text
atLeast 1 Parser Text
comment' )
special :: Parser Token
special :: Parser Token
special = Text -> Text -> Token
forall s. s -> s -> Token' s
Special
(Text -> Text -> Token)
-> Parser Text -> Parser Text (Text -> Token)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ( Char -> Text -> Text
T.cons (Char -> Text -> Text) -> Parser Char -> Parser Text (Text -> Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Char
satisfy (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
=='-') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace))
Parser Text (Text -> Text) -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Bool) -> Parser Text
takeTill ((Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
=='>') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
Parser Text -> Parser Text () -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Text ()
skipSpace )
Parser Text (Text -> Token) -> Parser Text -> Parser Token
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Bool) -> Parser Text
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
=='>') Parser Token -> Parser Char -> Parser Token
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char '>'
tag :: Parser Token
tag :: Parser Token
tag = do
TagType
t <- Text -> Parser Text
string "/" Parser Text -> Parser Text TagType -> Parser Text TagType
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> TagType -> Parser Text TagType
forall (m :: * -> *) a. Monad m => a -> m a
return TagType
TagTypeClose
Parser Text TagType -> Parser Text TagType -> Parser Text TagType
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> Parser Text
string "!" Parser Text -> Parser Text TagType -> Parser Text TagType
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> TagType -> Parser Text TagType
forall (m :: * -> *) a. Monad m => a -> m a
return TagType
TagTypeSpecial
Parser Text TagType -> Parser Text TagType -> Parser Text TagType
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> TagType -> Parser Text TagType
forall (m :: * -> *) a. Monad m => a -> m a
return TagType
TagTypeNormal
case TagType
t of
TagTypeClose ->
Text -> Token
forall s. s -> Token' s
TagClose (Text -> Token) -> Parser Text -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
=='>')
Parser Token -> Parser Char -> Parser Token
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char '>'
TagTypeSpecial -> Parser Text -> Parser Bool
forall a. Parser a -> Parser Bool
boolP (Text -> Parser Text
string "--") Parser Bool -> (Bool -> Parser Token) -> Parser Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
Parser Token -> Parser Token -> Bool -> Parser Token
forall a. a -> a -> Bool -> a
cond Parser Token
comment Parser Token
special
TagTypeNormal -> do
Text
name <- (Char -> Bool) -> Parser Text
takeTill ((Char, Char, Char) -> Char -> Bool
forall a. Eq a => (a, a, a) -> a -> Bool
in3 ('<','>','/') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
(as :: [Attr]
as, close :: Bool
close) <- Parser ([Attr], Bool)
attrs
Token -> Parser Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> Parser Token) -> Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ Text -> [Attr] -> Bool -> Token
forall s. s -> [Attr' s] -> Bool -> Token' s
TagOpen Text
name [Attr]
as Bool
close
incomplete :: Parser Token
incomplete :: Parser Token
incomplete = Text -> Token
forall s. s -> Token' s
Incomplete (Text -> Token) -> (Text -> Text) -> Text -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Text -> Text
T.cons '<' (Text -> Token) -> Parser Text -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
takeText
text :: Parser Token
text :: Parser Token
text = Text -> Token
forall s. s -> Token' s
Text (Text -> Token) -> Parser Text -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> Parser Text -> Parser Text
atLeast 1 ((Char -> Bool) -> Parser Text
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
=='<'))
decodeEntitiesText :: Monad m => Conduit Token m Token
decodeEntitiesText :: Conduit Token m Token
decodeEntitiesText =
Dec Builder Text -> Conduit Token m Token
forall (m :: * -> *) builder string.
(Monad m, Monoid builder, Monoid string, IsString string,
Eq string) =>
Dec builder string -> Conduit (Token' string) m (Token' string)
decodeEntities
Dec :: forall builder string.
(builder -> string)
-> ((Char -> Bool) -> string -> (string, string))
-> (string -> builder)
-> (Int -> string -> string)
-> (string -> Maybe string)
-> (string -> Maybe (Char, string))
-> Dec builder string
Dec { decToS :: Builder -> Text
decToS = Text -> Text
L.toStrict (Text -> Text) -> (Builder -> Text) -> Builder -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> Text
B.toLazyText
, decBreak :: (Char -> Bool) -> Text -> Attr
decBreak = (Char -> Bool) -> Text -> Attr
T.break
, decBuilder :: Text -> Builder
decBuilder = Text -> Builder
B.fromText
, decDrop :: Int -> Text -> Text
decDrop = Int -> Text -> Text
T.drop
, decEntity :: Text -> Maybe Text
decEntity = Text -> Maybe Text
forall (m :: * -> *). MonadThrow m => Text -> m Text
decodeEntity
, decUncons :: Text -> Maybe (Char, Text)
decUncons = Text -> Maybe (Char, Text)
T.uncons }
where decodeEntity :: Text -> m Text
decodeEntity entity :: Text
entity =
[Text] -> ConduitT () Text m ()
forall (m :: * -> *) a i. Monad m => [a] -> ConduitT i a m ()
CL.sourceList ["&",Text
entity,";"]
ConduitT () Text m ()
-> ConduitT Text EventPos m () -> ConduitT () EventPos m ()
forall (m :: * -> *) a b c r.
Monad m =>
Conduit a m b -> ConduitT b c m r -> ConduitT a c m r
$= ParseSettings -> ConduitT Text EventPos m ()
forall (m :: * -> *).
MonadThrow m =>
ParseSettings -> ConduitT Text EventPos m ()
XML.parseTextPos ParseSettings
forall a. Default a => a
def { psDecodeEntities :: DecodeEntities
XML.psDecodeEntities = DecodeEntities
XML.decodeHtmlEntities }
ConduitT () EventPos m ()
-> ConduitT EventPos Event m () -> ConduitT () Event m ()
forall (m :: * -> *) a b c r.
Monad m =>
Conduit a m b -> ConduitT b c m r -> ConduitT a c m r
$= (EventPos -> Event) -> ConduitT EventPos Event m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
CL.map EventPos -> Event
forall a b. (a, b) -> b
snd
ConduitT () Event m () -> Sink Event m Text -> m Text
forall (m :: * -> *) a b.
Monad m =>
Source m a -> Sink a m b -> m b
$$ Sink Event m Text
forall (m :: * -> *) o. MonadThrow m => ConduitT Event o m Text
XML.content
token :: Parser Token
token :: Parser Token
token = Char -> Parser Char
char '<' Parser Char -> Parser Token -> Parser Token
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Parser Token
tag Parser Token -> Parser Token -> Parser Token
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token
incomplete)
Parser Token -> Parser Token -> Parser Token
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token
text
tillScriptEnd :: Token -> Parser [Token]
tillScriptEnd :: Token -> Parser [Token]
tillScriptEnd t :: Token
t = [Token] -> [Token]
forall a. [a] -> [a]
reverse ([Token] -> [Token]) -> Parser [Token] -> Parser [Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Token] -> Parser [Token]
loop [Token
t]
Parser [Token] -> Parser [Token] -> Parser [Token]
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
:[]) (Token -> [Token]) -> (Text -> Token) -> Text -> [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Token
forall s. s -> Token' s
Incomplete (Text -> Token) -> (Text -> Text) -> Text -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text -> Text
T.append Text
script (Text -> [Token]) -> Parser Text -> Parser [Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
takeText
where
script :: Text
script = Text -> Text
L.toStrict (Text -> Text) -> (Builder -> Text) -> Builder -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> Text
B.toLazyText (Builder -> Text) -> Builder -> Text
forall a b. (a -> b) -> a -> b
$ (Text -> Text) -> Token -> Builder
showToken Text -> Text
forall a. a -> a
id Token
t
loop :: [Token] -> Parser [Token]
loop acc :: [Token]
acc = (Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
:[Token]
acc) (Token -> [Token]) -> Parser Token -> Parser [Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Token
scriptEnd
Parser [Token] -> Parser [Token] -> Parser [Token]
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Token
text Parser Token -> (Token -> Parser [Token]) -> Parser [Token]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Token] -> Parser [Token]
loop ([Token] -> Parser [Token])
-> (Token -> [Token]) -> Token -> Parser [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
:[Token]
acc))
scriptEnd :: Parser Token
scriptEnd = Text -> Parser Text
string "</script>" Parser Text -> Parser Token -> Parser Token
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Token -> Parser Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Token
forall s. s -> Token' s
TagClose "script")
html :: Parser [Token]
html :: Parser [Token]
html = Parser [Token]
tokens Parser [Token] -> Parser [Token] -> Parser [Token]
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> [Token] -> Parser [Token]
forall (f :: * -> *) a. Applicative f => a -> f a
pure []
where
tokens :: Parser [Token]
tokens :: Parser [Token]
tokens = do
Token
t <- Parser Token
token
case Token
t of
(TagOpen name :: Text
name _ close :: Bool
close)
| Bool -> Bool
not Bool
close Bool -> Bool -> Bool
&& Text
nameText -> Text -> Bool
forall a. Eq a => a -> a -> Bool
=="script"
-> [Token] -> [Token] -> [Token]
forall a. [a] -> [a] -> [a]
(++) ([Token] -> [Token] -> [Token])
-> Parser [Token] -> Parser Text ([Token] -> [Token])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Token -> Parser [Token]
tillScriptEnd Token
t Parser Text ([Token] -> [Token])
-> Parser [Token] -> Parser [Token]
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser [Token]
html
_ -> (Token
tToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
:) ([Token] -> [Token]) -> Parser [Token] -> Parser [Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser [Token]
html
decode :: Text -> Either String [Token]
decode :: Text -> Either String [Token]
decode = ([Token] -> [Token])
-> Either String [Token] -> Either String [Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [Token] -> [Token]
forall (t :: * -> *). Foldable t => t Token -> [Token]
decodeEntitiesText' (Either String [Token] -> Either String [Token])
-> (Text -> Either String [Token]) -> Text -> Either String [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser [Token] -> Text -> Either String [Token]
forall a. Parser a -> Text -> Either String a
parseOnly Parser [Token]
html
where
decodeEntitiesText' :: t Token -> [Token]
decodeEntitiesText' tokens :: t Token
tokens = Identity [Token] -> [Token]
forall a. Identity a -> a
runIdentity (Identity [Token] -> [Token]) -> Identity [Token] -> [Token]
forall a b. (a -> b) -> a -> b
$ (Token -> ConduitT () Token Identity ())
-> t Token -> ConduitT () Token Identity ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ Token -> ConduitT () Token Identity ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield t Token
tokens ConduitT () Token Identity ()
-> Sink Token Identity [Token] -> Identity [Token]
forall (m :: * -> *) a b.
Monad m =>
Source m a -> Sink a m b -> m b
$$ Conduit Token Identity Token
forall (m :: * -> *). Monad m => Conduit Token m Token
decodeEntitiesText Conduit Token Identity Token
-> Sink Token Identity [Token] -> Sink Token Identity [Token]
forall (m :: * -> *) a b c r.
Monad m =>
Conduit a m b -> ConduitT b c m r -> ConduitT a c m r
=$ Sink Token Identity [Token]
forall (m :: * -> *) a o. Monad m => ConduitT a o m [a]
CL.consume
atLeast :: Int -> Parser Text -> Parser Text
atLeast :: Int -> Parser Text -> Parser Text
atLeast 0 p :: Parser Text
p = Parser Text
p
atLeast n :: Int
n p :: Parser Text
p = Char -> Text -> Text
T.cons (Char -> Text -> Text) -> Parser Char -> Parser Text (Text -> Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar Parser Text (Text -> Text) -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> Parser Text -> Parser Text
atLeast (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) Parser Text
p
cond :: a -> a -> Bool -> a
cond :: a -> a -> Bool -> a
cond a1 :: a
a1 a2 :: a
a2 b :: Bool
b = if Bool
b then a
a1 else a
a2
(||.) :: Applicative f => f Bool -> f Bool -> f Bool
||. :: f Bool -> f Bool -> f Bool
(||.) = (Bool -> Bool -> Bool) -> f Bool -> f Bool -> f Bool
forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 Bool -> Bool -> Bool
(||)
in2 :: Eq a => (a,a) -> a -> Bool
in2 :: (a, a) -> a -> Bool
in2 (a1 :: a
a1,a2 :: a
a2) a :: a
a = a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a1 Bool -> Bool -> Bool
|| a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a2
in3 :: Eq a => (a,a,a) -> a -> Bool
in3 :: (a, a, a) -> a -> Bool
in3 (a1 :: a
a1,a2 :: a
a2,a3 :: a
a3) a :: a
a = a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a1 Bool -> Bool -> Bool
|| a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a2 Bool -> Bool -> Bool
|| a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a3
boolP :: Parser a -> Parser Bool
boolP :: Parser a -> Parser Bool
boolP p :: Parser a
p = Parser a
p Parser a -> Parser Bool -> Parser Bool
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Bool -> Parser Bool
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True Parser Bool -> Parser Bool -> Parser Bool
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Bool -> Parser Bool
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False
maybeP :: Parser a -> Parser (Maybe a)
maybeP :: Parser a -> Parser (Maybe a)
maybeP p :: Parser a
p = a -> Maybe a
forall a. a -> Maybe a
Just (a -> Maybe a) -> Parser a -> Parser (Maybe a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser a
p Parser (Maybe a) -> Parser (Maybe a) -> Parser (Maybe a)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Maybe a -> Parser (Maybe a)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe a
forall a. Maybe a
Nothing
cc :: [Text] -> B.Builder
cc :: [Text] -> Builder
cc = [Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat ([Builder] -> Builder)
-> ([Text] -> [Builder]) -> [Text] -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Builder) -> [Text] -> [Builder]
forall a b. (a -> b) -> [a] -> [b]
map Text -> Builder
B.fromText
showToken :: (Text -> Text) -> Token -> B.Builder
showToken :: (Text -> Text) -> Token -> Builder
showToken hl :: Text -> Text
hl (TagOpen name :: Text
name as :: [Attr]
as close :: Bool
close) =
[Text] -> Builder
cc ([Text] -> Builder) -> [Text] -> Builder
forall a b. (a -> b) -> a -> b
$ [Text -> Text
hl "<", Text
name]
[Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ (Attr -> Text) -> [Attr] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Attr -> Text
showAttr [Attr]
as
[Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ [Text -> Text
hl (if Bool
close then "/>" else ">")]
where
showAttr :: Attr -> Text
showAttr :: Attr -> Text
showAttr (key :: Text
key, value :: Text
value) = [Text] -> Text
T.concat ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ [" ", Text
key, Text -> Text
hl "=\""] [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ (Char -> Text) -> String -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Char -> Text
escape (Text -> String
T.unpack Text
value) [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ [Text -> Text
hl "\""]
escape :: Char -> Text
escape '"' = "\\\""
escape '\\' = "\\\\"
escape c :: Char
c = Char -> Text
T.singleton Char
c
showToken hl :: Text -> Text
hl (TagClose name :: Text
name) = [Text] -> Builder
cc [Text -> Text
hl "</", Text
name, Text -> Text
hl ">"]
showToken _ (Text s :: Text
s) = Text -> Builder
B.fromText Text
s
showToken hl :: Text -> Text
hl (Comment s :: Text
s) = [Text] -> Builder
cc [Text -> Text
hl "<!--", Text
s, Text -> Text
hl "-->"]
showToken hl :: Text -> Text
hl (Special name :: Text
name s :: Text
s) = [Text] -> Builder
cc [Text -> Text
hl "<!", Text
name, " ", Text
s, Text -> Text
hl ">"]
showToken _ (Incomplete s :: Text
s) = Text -> Builder
B.fromText Text
s
tokenStream :: Fail.MonadFail m
#if MIN_VERSION_conduit(1, 0, 0)
=> Conduit Text m Token
#else
=> GInfConduit Text m Token
#endif
tokenStream :: Conduit Text m Token
tokenStream =
Text -> Conduit Text m Token
forall (m :: * -> *).
MonadFail m =>
Text -> ConduitT Text Token m ()
loop Text
T.empty Conduit Text m Token
-> ConduitT Token Token m () -> Conduit Text m Token
forall (m :: * -> *) a b c r.
Monad m =>
Conduit a m b -> ConduitT b c m r -> ConduitT a c m r
=$= ConduitT Token Token m ()
forall (m :: * -> *). Monad m => Conduit Token m Token
decodeEntitiesText
where
#if MIN_VERSION_conduit(1, 0, 0)
loop :: Text -> ConduitT Text Token m ()
loop accum :: Text
accum = ConduitT Text Token m (Maybe Text)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT Text Token m (Maybe Text)
-> (Maybe Text -> ConduitT Text Token m ())
-> ConduitT Text Token m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitT Text Token m ()
-> (Text -> ConduitT Text Token m ())
-> Maybe Text
-> ConduitT Text Token m ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Text -> () -> ConduitT Text Token m ()
forall (m :: * -> *) b i.
Monad m =>
Text -> b -> ConduitT i Token m b
close Text
accum ()) (Text -> Text -> ConduitT Text Token m ()
push Text
accum)
#else
loop accum = awaitE >>= either (close accum) (push accum)
#endif
push :: Text -> Text -> ConduitT Text Token m ()
push accum :: Text
accum input :: Text
input =
case Parser [Token] -> Text -> Either String [Token]
forall a. Parser a -> Text -> Either String a
parseOnly Parser [Token]
html (Text
accum Text -> Text -> Text
`T.append` Text
input) of
Right ([Token] -> (Text, [Token])
forall s. Monoid s => [Token' s] -> (s, [Token' s])
splitAccum -> (accum' :: Text
accum', tokens :: [Token]
tokens)) -> (Token -> ConduitT Text Token m ())
-> [Token] -> ConduitT Text Token m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ Token -> ConduitT Text Token m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield [Token]
tokens ConduitT Text Token m ()
-> ConduitT Text Token m () -> ConduitT Text Token m ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> ConduitT Text Token m ()
loop Text
accum'
Left err :: String
err -> String -> ConduitT Text Token m ()
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
err
close :: Text -> b -> ConduitT i Token m b
close s :: Text
s r :: b
r = do
Bool -> ConduitT i Token m () -> ConduitT i Token m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (Text -> Bool
T.null Text
s) (ConduitT i Token m () -> ConduitT i Token m ())
-> ConduitT i Token m () -> ConduitT i Token m ()
forall a b. (a -> b) -> a -> b
$ Token -> ConduitT i Token m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (Token -> ConduitT i Token m ()) -> Token -> ConduitT i Token m ()
forall a b. (a -> b) -> a -> b
$ Text -> Token
forall s. s -> Token' s
Text Text
s
b -> ConduitT i Token m b
forall (m :: * -> *) a. Monad m => a -> m a
return b
r
tokenStreamBS :: (MonadThrow m, Fail.MonadFail m)
#if MIN_VERSION_conduit(1, 0, 0)
=> Conduit ByteString m Token
#else
=> GLInfConduit ByteString m Token
#endif
tokenStreamBS :: Conduit ByteString m Token
tokenStreamBS = do
Token
tk <- Parser ByteString Token -> ConduitT ByteString Token m Token
forall a (m :: * -> *) b o.
(AttoparsecInput a, MonadThrow m) =>
Parser a b -> ConduitT a o m b
C.sinkParser (Parser ()
skipBOM Parser () -> Parser () -> Parser ()
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
S.skipSpace Parser () -> Parser ByteString Char -> Parser ByteString Char
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Char -> Parser ByteString Char
S.char '<' Parser ByteString Char
-> Parser ByteString Token -> Parser ByteString Token
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ByteString Token
S.tag)
let (mencoding :: Maybe ByteString
mencoding, yieldToken :: Bool
yieldToken) =
case Token
tk of
(TagOpen "?xml" as :: [Attr' ByteString]
as _) ->
(ByteString -> [Attr' ByteString] -> Maybe ByteString
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup "encoding" [Attr' ByteString]
as, Bool
False)
_ -> (Maybe ByteString
forall a. Maybe a
Nothing, Bool
True)
let codec :: Codec
codec = Codec -> Maybe Codec -> Codec
forall a. a -> Maybe a -> a
fromMaybe Codec
C.utf8 (Maybe ByteString
mencoding Maybe ByteString -> (ByteString -> Maybe Codec) -> Maybe Codec
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= CI ByteString -> Maybe Codec
getCodec (CI ByteString -> Maybe Codec)
-> (ByteString -> CI ByteString) -> ByteString -> Maybe Codec
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> CI ByteString
forall s. FoldCase s => s -> CI s
CI.mk)
Bool -> Conduit ByteString m Token -> Conduit ByteString m Token
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
yieldToken (Conduit ByteString m Token -> Conduit ByteString m Token)
-> Conduit ByteString m Token -> Conduit ByteString m Token
forall a b. (a -> b) -> a -> b
$ (m Token -> ConduitT ByteString Token m Token
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift ((ByteString -> m Text) -> Token -> m Token
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (Codec -> ByteString -> m Text
forall (m :: * -> *). MonadThrow m => Codec -> ByteString -> m Text
decodeBS Codec
codec) Token
tk) ConduitT ByteString Token m Token
-> (Token -> Conduit ByteString m Token)
-> Conduit ByteString m Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Token -> Conduit ByteString m Token
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield) Conduit ByteString m Token
-> ConduitT Token Token m () -> Conduit ByteString m Token
forall (m :: * -> *) a b c r.
Monad m =>
Conduit a m b -> ConduitT b c m r -> ConduitT a c m r
=$= ConduitT Token Token m ()
forall (m :: * -> *). Monad m => Conduit Token m Token
decodeEntitiesText
#if MIN_VERSION_conduit(1, 0, 0)
Codec -> ConduitT ByteString Text m ()
forall (m :: * -> *).
MonadThrow m =>
Codec -> ConduitT ByteString Text m ()
C.decode Codec
codec ConduitT ByteString Text m ()
-> ConduitT Text Token m () -> Conduit ByteString m Token
forall (m :: * -> *) a b c r.
Monad m =>
Conduit a m b -> ConduitT b c m r -> ConduitT a c m r
=$= ConduitT Text Token m ()
forall (m :: * -> *). MonadFail m => Conduit Text m Token
tokenStream
#else
C.decode codec `pipeL` tokenStream
#endif
where
skipBOM :: S.Parser ()
skipBOM :: Parser ()
skipBOM =
( ByteString -> Parser ByteString
S.string "\xff\xfe"
Parser ByteString -> Parser ByteString -> Parser ByteString
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ByteString -> Parser ByteString
S.string "\xef\xbb\xbf"
) Parser ByteString -> Parser () -> Parser ()
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> () -> Parser ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
Parser () -> Parser () -> Parser ()
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> () -> Parser ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
getCodec :: CI.CI ByteString -> Maybe C.Codec
getCodec :: CI ByteString -> Maybe Codec
getCodec c :: CI ByteString
c =
case CI ByteString
c of
"utf-8" -> Codec -> Maybe Codec
forall a. a -> Maybe a
Just Codec
C.utf8
"utf8" -> Codec -> Maybe Codec
forall a. a -> Maybe a
Just Codec
C.utf8
"iso8859" -> Codec -> Maybe Codec
forall a. a -> Maybe a
Just Codec
C.iso8859_1
_ -> Maybe Codec
forall a. Maybe a
Nothing
decodeBS :: Codec -> ByteString -> m Text
decodeBS codec :: Codec
codec bs :: ByteString
bs = ([Text] -> Text) -> m [Text] -> m Text
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM [Text] -> Text
T.concat (m [Text] -> m Text) -> m [Text] -> m Text
forall a b. (a -> b) -> a -> b
$ ByteString -> ConduitT () ByteString m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield ByteString
bs ConduitT () ByteString m ()
-> ConduitT ByteString Text m () -> ConduitT () Text m ()
forall (m :: * -> *) a b c r.
Monad m =>
Conduit a m b -> ConduitT b c m r -> ConduitT a c m r
$= Codec -> ConduitT ByteString Text m ()
forall (m :: * -> *).
MonadThrow m =>
Codec -> ConduitT ByteString Text m ()
C.decode Codec
codec ConduitT () Text m () -> Sink Text m [Text] -> m [Text]
forall (m :: * -> *) a b.
Monad m =>
Source m a -> Sink a m b -> m b
$$ Sink Text m [Text]
forall (m :: * -> *) a o. Monad m => ConduitT a o m [a]
C.consume