-- -- Copyright (c) 2013-2023 Nicola Bonelli -- -- This program is free software; you can redistribute it and/or modify -- it under the terms of the GNU General Public License as published by -- the Free Software Foundation; either version 2 of the License, or -- (at your option) any later version. -- -- This program is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- GNU General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with this program; if not, write to the Free Software -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -- module CGrep.Strategy.Semantic (search) where import qualified Data.ByteString.Char8 as C import CGrep.Parser.Token import CGrep.ContextFilter ( contextBitComment, mkContextFilter, (~!) ) import CGrep.Common ( Text8, trim, getTargetName, getTargetContents, expandMultiline, ignoreCase, trim8, subText ) import CGrep.Search ( eligibleForSearch, searchStringIndices ) import CGrep.Output ( Output, mkOutputElements, runSearch ) import CGrep.Parser.Line ( getAllLineOffsets ) import CGrep.Parser.Atom ( Atom(..), mkAtomFromToken, combineAtoms, filterTokensWithAtoms) import Control.Monad.Trans.Reader ( reader, ask ) import Control.Monad.IO.Class ( MonadIO(liftIO) ) import Data.List ( sortBy, nub ) import Data.Function ( on ) import Data.Maybe ( mapMaybe ) import Reader ( ReaderIO, Env (..) ) import Verbose ( putMsgLnVerbose ) import Util ( rmQuote8 ) import CGrep.Parser.Chunk import System.Posix.FilePath ( RawFilePath, takeBaseName ) import CGrep.FileType ( FileType ) import CGrep.FileTypeMap ( fileTypeLookup, FileTypeInfo, contextFilter ) import System.IO ( stderr ) import qualified Data.Sequence as S import Data.Foldable ( Foldable(toList) ) import Data.Coerce ( coerce ) search :: Maybe (FileType, FileTypeInfo) -> RawFilePath -> [Text8] -> ReaderIO [Output] search info f ps = do Env{..} <- ask text <- liftIO $ getTargetContents f let filename = getTargetName f let [text''', _, text', _ ] = scanr ($) text [ expandMultiline opt , contextFilter (fst <$> fileTypeLookup opt filename) filt True , ignoreCase opt ] filt = mkContextFilter opt ~! contextBitComment -- pre-process patterns pfilter = TokenFilter { tfIdentifier = True, tfKeyword = True, tfNativeType = True, tfString = True, tfNumber = True, tfOperator = True, tfBracket = True} patterns = map (parseTokens pfilter (snd <$> info) . contextFilter (fst <$> fileTypeLookup opt filename) filt True) ps patterns' = map (mkAtomFromToken <$>) patterns patterns'' = map (combineAtoms . map (:[])) (toList <$> patterns') identifiers = mapMaybe (\case Raw (Token (Chunk ChunkString xs _)) -> Just (rmQuote8 $ trim8 xs) Raw (Token (Chunk ChunkIdentifier "OR" _)) -> Nothing Raw t -> Just (tToken t) _ -> Nothing) (concatMap toList patterns') -- put banners... putMsgLnVerbose 2 stderr $ "strategy : running generic semantic search on " <> filename <> "..." putMsgLnVerbose 2 stderr $ "atoms : " <> show patterns'' <> " -> identifiers: " <> show identifiers putMsgLnVerbose 3 stderr $ "---\n" <> text''' <> "\n---" let indices' = searchStringIndices identifiers text' runSearch opt filename (eligibleForSearch identifiers indices') $ do -- parse source code, get the Generic.Chunk list... let tfilter = mkTokenFilter $ cTyp . coerce <$> concatMap toList patterns let tokens = toList $ parseTokens tfilter (snd <$> info) (subText indices' text''') -- get matching tokens ... let tokens' = sortBy (compare `on` tOffset) $ nub $ concatMap (\ms -> filterTokensWithAtoms opt ms tokens) patterns'' -- convert Tokens to Chunks let matches = coerce tokens' :: [Chunk] putMsgLnVerbose 2 stderr $ "tokens : " <> show tokens putMsgLnVerbose 2 stderr $ "matches : " <> show matches let lineOffsets = getAllLineOffsets text mkOutputElements lineOffsets filename text text''' matches