{-# LANGUAGE NoImplicitPrelude #-} module Language.Haskell.Brittany.Internal.WriteBriDoc.AlignmentAlgo ( alignColsLines ) where import Language.Haskell.Brittany.Internal.Prelude import qualified Control.Monad.Trans.State.Strict as StateS -- import qualified Data.Either as Either import qualified Data.Foldable as Foldable import qualified Data.IntMap.Lazy as IntMapL import qualified Data.IntMap.Strict as IntMapS import qualified Data.Sequence as Seq import qualified Data.Text as Text import qualified GHC.OldList as List import Language.Haskell.Brittany.Internal.Config.Types import Language.Haskell.Brittany.Internal.Utils import Language.Haskell.Brittany.Internal.WriteBriDoc.Operators import Language.Haskell.Brittany.Internal.WriteBriDoc.Types import Language.Haskell.Brittany.Internal.Components.BriDoc type ColIndex = Int data ColumnSpacing = ColumnSpacingLeaf Int | ColumnSpacingRef Int Int type ColumnBlock a = [a] type ColumnBlocks a = Seq [a] type ColMap1 = IntMapL.IntMap {- ColIndex -} (Bool, ColumnBlocks ColumnSpacing) type ColMap2 = IntMapL.IntMap {- ColIndex -} (Float, ColumnBlock Int, ColumnBlocks Int) -- (ratio of hasSpace, maximum, raw) data ColInfo = ColInfoStart -- start value to begin the mapAccumL. | ColInfoNo BriDoc | ColInfo ColIndex ColSig [(Int, ColInfo)] instance Show ColInfo where show ColInfoStart = "ColInfoStart" show (ColInfoNo bd) = "ColInfoNo " ++ show (take 30 (show (briDocToDoc bd)) ++ "..") show (ColInfo ind sig list) = "ColInfo " ++ show ind ++ " " ++ show sig ++ " " ++ show list data ColBuildState = ColBuildState { _cbs_map :: ColMap1 , _cbs_index :: ColIndex } -- In theory -- ========= -- .. this algorithm works roughly in these steps: -- -- 1. For each line, get the (nested) column info, descending as far as -- BDCols nodes go. The column info is a (rose) tree where the leafs -- are arbitrary (non-BDCols) BriDocs. -- 2. Walk through the lines and compare its column info with that of its -- predecessor. If both are non-leafs and the column "signatures" align -- (they don't align e.g. when they are totally different syntactical -- structures or the number of children differs), mark these parts of -- the two tree structures as connected and recurse to its children -- (i.e. again comparing the children in this line with the children in -- the previous line). -- 3. What we now have is one tree per line, and connections between "same" -- nodes between lines. These connection can span multiple lines. -- We next look at spacing information. This is available at the leafs, -- but in this step we aggregate _over connections_. At the top level, this -- gives us one piece of data: How long would each line be, if we fully -- aligned everything (kept all connections "active"). In contrast to -- just taking the sum of all leafs for each tree, this line length includes -- the spaces used for alignment. -- 4. Treat those lines where alignment would result in overflowing of the -- column limit. This "treatment" is currently configurable, and can e.g. -- mean: -- a) we stop alignment alltogether, -- b) we remove alignment just from the overflowing lines, -- c) we reduce the number of spaces inserted in overflowing lines using -- some technique to make them not overflow, but without reducing the -- space insertion to zero, -- d) don't do anything -- 5. Actually print the lines, walking over each tree and inserting spaces -- according to the info and decisions gathered in the previous steps. -- -- Possible improvements -- ===================== -- -- - If alignment is disabled for specific lines, the aggregated per-connection -- info of those lines is still retained and not recalculated. This can -- result in spaces being inserted to create alignment with a line that -- would overflow and thus gets disabled entirely. -- An better approach would be to repeat step 3 after marking overflowing -- lines as such, and not include the overflowing spacings as references -- for non-overflowing ones. In the simplest case one additional iteration -- would suffice, e.g. 1-2-3-4-3-5, but it would also be possible to refine -- this and first remove alignment in the deepest parts of the tree for -- overflowing lines, repeating and moving upwards until no lines are -- anymore overflowing. -- Further, it may make sense to break up connections when overflowing would -- occur. -- - It may also make sense to not filter all overflowing lines, but remove -- them one-by-one and in each step recalculate the aggregated connection -- spacing info. Because removing one overflowing line from the calculation -- may very well cause another previously overflowing line to not overflow -- any longer. -- There is also a nasty optimization problem hiding in there (find the -- minimal amount of alignment disabling that results in no overflows) -- but that is overkill. -- -- (with both these improvements there would be quite some repetition between -- steps 3 and 4, but it should be possible to ensure termination. Still, -- performance might become an issue as such an approach is not necessarily -- linear in bridoc size any more.) -- -- In practice -- =========== -- -- .. the current implementation is somewhat sloppy. Steps 1 and 2 -- are executed in one step, step 3 already applies one strategy that disables -- certain connections (see `_lconfig_alignmentLimit`) and step 4 does some -- of the calculations one might expect to occur in step 3. Steps 4 and 5 -- are executed in the same recursion, too. -- Also, _lconfig_alignmentLimit really is itself a hack that hides the issue -- mentioned in the first "possible improvement". alignColsLines :: LayoutConstraints m => (BriDoc -> m ()) -> [BriDoc] -> m () alignColsLines layoutBriDocM bridocs = do -- colInfos `forM_` \colInfo -> do -- tellDebugMess ("alignColsLines: at " ++ take 100 (show $ briDocToDoc $ head bridocs)) curX <- do state <- mGet return $ case _lstate_plannedSpace state of PlannedNone -> _lstate_curY state PlannedSameline i -> _lstate_curY state + i PlannedNewline _l -> lstate_baseY state PlannedDelta _ i -> i colMax <- mAsk <&> _conf_layout .> _lconfig_cols .> confUnpack alignMax <- mAsk <&> _conf_layout .> _lconfig_alignmentLimit .> confUnpack alignBreak <- mAsk <&> _conf_layout .> _lconfig_alignmentBreakOnMultiline .> confUnpack case () of _ -> do -- tellDebugMess ("colInfos:\n" ++ List.unlines [ "> " ++ prettyColInfos "> " x | x <- colInfos]) -- tellDebugMess ("processedMap: " ++ show processedMap) sequence_ $ List.intersperse layoutWriteEnsureNewlineBlock $ colInfos <&> processInfo layoutBriDocM colMax processedMap where (colInfos, finalState) = StateS.runState (mergeBriDocs bridocs) (ColBuildState IntMapS.empty 0) -- maxZipper :: [Int] -> [Int] -> [Int] -- maxZipper [] ys = ys -- maxZipper xs [] = xs -- maxZipper (x:xr) (y:yr) = max x y : maxZipper xr yr colAggregation :: [Int] -> Int colAggregation [] = 0 -- this probably cannot happen the way we call -- this function, because _cbs_map only ever -- contains nonempty Seqs. colAggregation xs = maximum [ x | x <- xs, x <= minimum xs + alignMax' ] where alignMax' = max 0 alignMax processedMap :: ColMap2 processedMap = fix $ \result -> _cbs_map finalState <&> \(lastFlag, colSpacingss) -> let colss = colSpacingss <&> \spss -> case reverse spss of [] -> [] (xN : xR) -> reverse $ (if lastFlag then fLast else fInit) xN : fmap fInit xR where fLast (ColumnSpacingLeaf len ) = len fLast (ColumnSpacingRef len _) = len fInit (ColumnSpacingLeaf len) = len fInit (ColumnSpacingRef _ i ) = case IntMapL.lookup i result of Nothing -> 0 Just (_, maxs, _) -> sum maxs maxCols = {-Foldable.foldl1 maxZipper-} fmap colAggregation $ transpose $ Foldable.toList colss (_, posXs) = -- trace ("colss=" ++ show colss ++ ", maxCols=" ++ show maxCols ++ " for " ++ take 100 (show $ briDocToDoc $ head bridocs)) $ mapAccumL (\acc x -> (acc + x, acc)) curX maxCols counter count l = if List.last posXs + List.last l <= colMax then count + 1 else count ratio = fromIntegral (foldl' counter (0 :: Int) colss) / fromIntegral (length colss) in (ratio, maxCols, colss) mergeBriDocs :: [BriDoc] -> StateS.State ColBuildState [ColInfo] mergeBriDocs bds = mergeBriDocsW ColInfoStart bds mergeBriDocsW :: ColInfo -> [BriDoc] -> StateS.State ColBuildState [ColInfo] mergeBriDocsW _ [] = return [] mergeBriDocsW lastInfo (bd : bdr) = do info <- mergeInfoBriDoc True lastInfo bd infor <- mergeBriDocsW -- (if alignBreak && briDocIsMultiLine bd then ColInfoStart else info) (if shouldBreakAfter bd then ColInfoStart else info) bdr return $ info : infor -- even with alignBreak config flag, we don't stop aligning for certain -- ColSigs - the ones with "False" below. The main reason is that -- there are uses of BDCols where they provide the alignment of several -- consecutive full larger code segments, for example ColOpPrefix. -- Motivating example is -- > foo -- > $ [ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -- > , bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb -- > ] -- > ++ [ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccc ] -- If we break the alignment here, then all three lines for the first -- list move left by one, which is horrible. We really don't want to -- break whole-block alignments. -- For list, listcomp, tuple and tuples the reasoning is much simpler: -- alignment should not have much effect anyways, so i simply make the -- choice here that enabling alignment is the safer route for preventing -- potential glitches, and it should never have a negative effect. -- For RecUpdate the argument is much less clear - it is mostly a -- personal preference to not break alignment for those, even if -- multiline. Really, this should be configurable.. (TODO) shouldBreakAfter :: BriDoc -> Bool shouldBreakAfter bd = alignBreak && briDocIsMultiLine bd && case bd of (BDCols ColTyOpPrefix _) -> False (BDCols ColPatternsFuncPrefix _) -> True (BDCols ColPatternsFuncInfix _) -> True (BDCols ColPatterns _) -> True (BDCols ColCasePattern _) -> True (BDCols ColBindingLine{} _) -> True (BDCols ColGuard _) -> True (BDCols ColGuardedBody _) -> True (BDCols ColBindStmt _) -> True (BDCols ColDoLet _) -> True (BDCols ColRec _) -> False (BDCols ColRecUpdate _) -> False (BDCols ColRecDecl _) -> False (BDCols ColListComp _) -> False (BDCols ColList _) -> False (BDCols ColApp{} _) -> True (BDCols ColTuple _) -> False (BDCols ColTuples _) -> False (BDCols ColOpPrefix _) -> False _ -> True mergeInfoBriDoc :: Bool -> ColInfo -> BriDoc -> StateS.StateT ColBuildState Identity ColInfo mergeInfoBriDoc lastFlag ColInfoStart = briDocToColInfo lastFlag mergeInfoBriDoc lastFlag ColInfoNo{} = briDocToColInfo lastFlag mergeInfoBriDoc lastFlag (ColInfo infoInd infoSig subLengthsInfos) = \case brdc@(BDCols colSig subDocs) | infoSig == colSig && length subLengthsInfos == length subDocs -> do let isLastList = if lastFlag then (== length subDocs) <$> [1 ..] else repeat False infos <- zip3 isLastList (snd <$> subLengthsInfos) subDocs `forM` \(lf, info, bd) -> mergeInfoBriDoc lf info bd let curLengths = briDocLineLength <$> subDocs let trueSpacings = getTrueSpacings (zip curLengths infos) do -- update map s <- StateS.get let m = _cbs_map s case IntMapS.lookup infoInd m of Just (_, spaces) -> StateS.put s { _cbs_map = IntMapS.insert infoInd (lastFlag, spaces Seq.|> trueSpacings) m } Nothing -> pure () -- shouldn't be possible return $ ColInfo infoInd colSig (zip curLengths infos) | otherwise -> briDocToColInfo lastFlag brdc brdc -> return $ ColInfoNo brdc processInfo :: LayoutConstraints m => (BriDoc -> m ()) -> Int -> ColMap2 -> ColInfo -> m () processInfo layoutBriDocM maxSpace m = \case ColInfoStart -> error "should not happen (TM)" ColInfoNo doc -> layoutBriDocM doc ColInfo ind _ list -> -- trace ("processInfo ind=" ++ show ind ++ ", list=" ++ show list ++ ", colmap=" ++ show m) $ do colMaxConf <- mAsk <&> _conf_layout .> _lconfig_cols .> confUnpack alignMode <- mAsk <&> _conf_layout .> _lconfig_columnAlignMode .> confUnpack curX <- do state <- mGet return $ case _lstate_plannedSpace state of PlannedNone -> _lstate_curY state PlannedSameline i -> _lstate_curY state + i PlannedNewline _l -> lstate_baseY state PlannedDelta _ i -> i let colMax = min colMaxConf (curX + maxSpace) -- tellDebugMess $ show curX let (ratio, maxCols1, _colss) = case IntMapS.lookup ind m of Just x -> x Nothing -> error "internal brittany error: processInfo bad lookup" let maxCols2 = list <&> \case (_, ColInfo i _ _) | Just (_, ms, _) <- IntMapS.lookup i m -> sum ms (l, _) -> l let maxCols = zipWith max maxCols1 maxCols2 let (maxX, posXs) = mapAccumL (\acc x -> (acc + x, acc)) curX maxCols -- handle the cases that the vertical alignment leads to more than max -- cols: -- this is not a full fix, and we must correct individually in addition. -- because: the (at least) line with the largest element in the last -- column will always still overflow, because we just updated the column -- sizes in such a way that it works _if_ we have sizes (*factor) -- in each column. but in that line, in the last column, we will be -- forced to occupy the full vertical space, not reduced by any factor. let fixedPosXs = case alignMode of ColumnAlignModeAnimouslyScale i | maxX > colMax -> fixed <&> (+ curX) where factor :: Float = -- 0.0001 as an offering to the floating point gods. min 1.0001 (fromIntegral (i + colMax - curX) / fromIntegral (maxX - curX)) offsets = (subtract curX) <$> posXs fixed = offsets <&> fromIntegral .> (* factor) .> truncate _ -> posXs let spacings = zipWith (-) (List.tail fixedPosXs ++ [min maxX colMax]) fixedPosXs -- tellDebugMess $ "ind = " ++ show ind -- tellDebugMess $ "spacings = " ++ show spacings -- tellDebugMess $ "maxCols = " ++ show maxCols -- tellDebugMess $ "fixedPosXs = " ++ show fixedPosXs -- tellDebugMess $ "list = " ++ show list -- tellDebugMess $ "maxSpace = " ++ show maxSpace let alignAct = zip3 fixedPosXs spacings list `forM_` \(destX, s, x) -> do -- tellDebugMess $ "layoutWriteEnsureAbsoluteN " ++ show destX layoutWriteEnsureAbsoluteN destX processInfo layoutBriDocM s m (snd x) noAlignAct = list `forM_` (snd .> processInfoIgnore layoutBriDocM) animousAct = -- trace ("animousAct fixedPosXs=" ++ show fixedPosXs ++ ", list=" ++ show list ++ ", maxSpace=" ++ show maxSpace ++ ", colMax=" ++ show colMax) $ if List.last fixedPosXs + fst (List.last list) > colMax -- per-item check if there is overflowing. then noAlignAct else alignAct case alignMode of ColumnAlignModeDisabled -> noAlignAct ColumnAlignModeUnanimously | maxX <= colMax -> alignAct ColumnAlignModeUnanimously -> noAlignAct ColumnAlignModeMajority limit | ratio >= limit -> animousAct ColumnAlignModeMajority{} -> noAlignAct ColumnAlignModeAnimouslyScale{} -> animousAct ColumnAlignModeAnimously -> animousAct ColumnAlignModeAlways -> alignAct getTrueSpacings :: [(Int, ColInfo)] -> [ColumnSpacing] getTrueSpacings lengthInfos = lengthInfos <&> \case (len, ColInfo i _ _) -> ColumnSpacingRef len i (len, _) -> ColumnSpacingLeaf len withAlloc :: Bool -> ( ColIndex -> StateS.State ColBuildState (ColumnBlocks ColumnSpacing, ColInfo) ) -> StateS.State ColBuildState ColInfo withAlloc lastFlag f = do cbs <- StateS.get let ind = _cbs_index cbs StateS.put $ cbs { _cbs_index = ind + 1 } (space, info) <- f ind StateS.get >>= \c -> StateS.put $ c { _cbs_map = IntMapS.insert ind (lastFlag, space) $ _cbs_map c } return info briDocLineLength :: BriDoc -> Int briDocLineLength briDoc = flip StateS.evalState False $ rec briDoc -- the state encodes whether a separator was already -- appended at the current position. where rec = \case BDEmpty -> return $ 0 BDLit t -> StateS.put False $> Text.length t BDSeq bds -> sum <$> rec `mapM` bds BDCols _ bds -> sum <$> rec `mapM` bds BDSeparator -> StateS.get >>= \b -> StateS.put True $> if b then 0 else 1 BDAddBaseY _ bd -> rec bd BDBaseYPushCur bd -> rec bd BDIndentLevelPushCur bd -> rec bd BDIndentLevelPop bd -> rec bd BDPar _ line _ -> rec line BDAlt{} -> error "briDocLineLength BDAlt" BDForceMultiline bd -> rec bd BDForceSingleline bd -> rec bd BDForwardLineMode bd -> rec bd BDExternal _ t -> return $ Text.length t BDPlain t -> return $ Text.length t BDQueueComments _ bd -> rec bd BDFlushCommentsPrior _ bd -> rec bd BDFlushCommentsPost _ bd -> rec bd BDLines ls@(_ : _) -> do x <- StateS.get return $ maximum $ ls <&> \l -> StateS.evalState (rec l) x BDEntryDelta _dp bd -> rec bd BDLines [] -> error "briDocLineLength BDLines []" BDEnsureIndent _ bd -> rec bd BDSetParSpacing bd -> rec bd BDForceParSpacing bd -> rec bd BDNonBottomSpacing _ bd -> rec bd BDDebug _ bd -> rec bd briDocIsMultiLine :: BriDoc -> Bool briDocIsMultiLine briDoc = rec briDoc where rec :: BriDoc -> Bool rec = \case BDEmpty -> False BDLit _ -> False BDSeq bds -> any rec bds BDCols _ bds -> any rec bds BDSeparator -> False BDAddBaseY _ bd -> rec bd BDBaseYPushCur bd -> rec bd BDIndentLevelPushCur bd -> rec bd BDIndentLevelPop bd -> rec bd BDPar{} -> True BDAlt{} -> error "briDocIsMultiLine BDAlt" BDForceMultiline _ -> True BDForceSingleline bd -> rec bd BDForwardLineMode bd -> rec bd BDExternal _ t | [_] <- Text.lines t -> False BDExternal{} -> True BDPlain t | [_] <- Text.lines t -> False BDPlain _ -> True BDQueueComments _ bd -> rec bd BDFlushCommentsPrior _ bd -> rec bd BDFlushCommentsPost _ bd -> rec bd BDEntryDelta _dp bd -> rec bd BDLines (_ : _ : _) -> True BDLines [_] -> False BDLines [] -> error "briDocIsMultiLine BDLines []" BDEnsureIndent _ bd -> rec bd BDSetParSpacing bd -> rec bd BDForceParSpacing bd -> rec bd BDNonBottomSpacing _ bd -> rec bd BDDebug _ bd -> rec bd briDocToColInfo :: Bool -> BriDoc -> StateS.State ColBuildState ColInfo briDocToColInfo lastFlag = \case BDCols sig list -> withAlloc lastFlag $ \ind -> do let isLastList = if lastFlag then (== length list) <$> [1 ..] else repeat False subInfos <- zip isLastList list `forM` uncurry briDocToColInfo let lengthInfos = zip (briDocLineLength <$> list) subInfos let trueSpacings = getTrueSpacings lengthInfos return $ (Seq.singleton trueSpacings, ColInfo ind sig lengthInfos) bd -> return $ ColInfoNo bd processInfoIgnore :: LayoutConstraints m => (BriDoc -> m ()) -> ColInfo -> m () processInfoIgnore layoutBriDocM = go where go = \case ColInfoStart -> error "should not happen (TM)" ColInfoNo doc -> layoutBriDocM doc ColInfo _ _ list -> list `forM_` (snd .> go) _prettyColInfos :: String -> ColInfo -> String _prettyColInfos prefix = \case ColInfoStart -> "start (?)" ColInfoNo bd -> "X" ++ replicate (briDocLineLength bd - 1) '_' ColInfo _ind sig below -> let (total, belowStrs) = List.mapAccumL (\x (add, info) -> (x + add, _prettyColInfos (prefix ++ replicate (x) ' ') info) ) 0 below in "X" ++ replicate (total - 1) 'x' ++ " as " ++ show sig ++ " " ++ show _ind ++ "\n" ++ prefix ++ List.concat belowStrs ++ "\n" ++ prefix ++ replicate total ' ' -- [ prefix ++ show k ++ ": " ++ prettyColInfos (prefix ++ " ") v -- | (k, v) <- below -- ]