diff -pur spack-src-new/clang/bindings/python/clang/cindex.py spack-src-newer/clang/bindings/python/clang/cindex.py --- spack-src-new/clang/bindings/python/clang/cindex.py 2025-02-03 19:31:25.893090799 +0100 +++ spack-src-newer/clang/bindings/python/clang/cindex.py 2025-02-03 19:37:00.122475192 +0100 @@ -571,7 +571,7 @@ class TokenGroup(object): conf.lib.clang_disposeTokens(self._tu, self._memory, self._count) @staticmethod - def get_tokens(tu, extent): + def get_tokens(tu, extent, options=0): """Helper method to return all tokens in an extent. This functionality is needed multiple places in this module. We define @@ -581,6 +581,8 @@ class TokenGroup(object): tokens_count = c_uint() conf.lib.clang_tokenize(tu, extent, byref(tokens_memory), byref(tokens_count)) + conf.lib.clang_tokenizeRange( + tu, extent, byref(tokens_memory), byref(tokens_count), options) count = int(tokens_count.value) @@ -1991,13 +1993,16 @@ class Cursor(Structure): for descendant in child.walk_preorder(): yield descendant - def get_tokens(self): + def get_tokens(self, options=0): """Obtain Token instances formulating that compose this Cursor. This is a generator for Token instances. It returns all tokens which occupy the extent this cursor occupies. + + options is a bitwise or of TokenGroup.TOKENIZE_XXX flags which will + control tokenization behavior. """ - return TokenGroup.get_tokens(self._tu, self.extent) + return TokenGroup.get_tokens(self._tu, self.extent, options) def get_field_offsetof(self): """Returns the offsetof the FIELD_DECL pointed by this Cursor.""" @@ -3283,18 +3288,21 @@ class TranslationUnit(ClangObject): return CodeCompletionResults(ptr) return None - def get_tokens(self, locations=None, extent=None): + def get_tokens(self, locations=None, extent=None, options=0): """Obtain tokens in this translation unit. This is a generator for Token instances. The caller specifies a range of source code to obtain tokens for. The range can be specified as a 2-tuple of SourceLocation or as a SourceRange. If both are defined, behavior is undefined. + + options is a bitwise or of TokenGroup.TOKENIZE_XXX flags which will + control tokenization behavior. """ if locations is not None: extent = SourceRange(start=locations[0], end=locations[1]) - return TokenGroup.get_tokens(self, extent) + return TokenGroup.get_tokens(self, extent, options) class File(ClangObject): @@ -3796,6 +3804,11 @@ functionList = [ "clang_tokenize", [TranslationUnit, SourceRange, POINTER(POINTER(Token)), POINTER(c_uint)], ), + ( + "clang_tokenizeRange", + [TranslationUnit, SourceRange, POINTER(POINTER(Token)), POINTER(c_uint), + c_uint] + ), ("clang_visitChildren", [Cursor, callbacks["cursor_visit"], py_object], c_uint), ("clang_Cursor_getNumArguments", [Cursor], c_int), ("clang_Cursor_getArgument", [Cursor, c_uint], Cursor, Cursor.from_result), diff -pur spack-src-new/clang/bindings/python/tests/cindex/test_cursor.py spack-src-newer/clang/bindings/python/tests/cindex/test_cursor.py --- spack-src-new/clang/bindings/python/tests/cindex/test_cursor.py 2025-02-03 19:30:48.340935255 +0100 +++ spack-src-newer/clang/bindings/python/tests/cindex/test_cursor.py 2025-02-03 19:34:29.005849262 +0100 @@ -11,6 +11,7 @@ import unittest from clang.cindex import AvailabilityKind from clang.cindex import CursorKind from clang.cindex import TemplateArgumentKind +from clang.cindex import TokenGroup from clang.cindex import TranslationUnit from clang.cindex import TypeKind from .util import get_cursor @@ -608,6 +609,14 @@ class TestCursor(unittest.TestCase): self.assertEqual(tokens[0].spelling, "int") self.assertEqual(tokens[1].spelling, "foo") + def test_get_tokens_with_whitespace(): + source = 'class C { void f(); }\nvoid C::f() { }' + tu = get_tu(source) + + tokens = list(tu.cursor.get_tokens(TokenGroup.TOKENIZE_KEEP_WHITESPACE)) + self.assertEqual(''.join(t.spelling for t in tokens), source) + self.assertEqual(len(tokens), 27, [t.spelling for t in tokens]) + def test_get_token_cursor(self): """Ensure we can map tokens to cursors.""" tu = get_tu("class A {}; int foo(A var = A());", lang="cpp") diff -pur spack-src-new/clang/bindings/python/tests/cindex/test_cursor.py.orig spack-src-newer/clang/bindings/python/tests/cindex/test_cursor.py.orig --- spack-src-new/clang/bindings/python/tests/cindex/test_cursor.py.orig 2023-10-31 09:00:30.000000000 +0100 +++ spack-src-newer/clang/bindings/python/tests/cindex/test_cursor.py.orig 2025-02-03 19:30:48.340935255 +0100 @@ -432,6 +432,14 @@ class TestCursor(unittest.TestCase): underlying = typedef.underlying_typedef_type self.assertEqual(underlying.kind, TypeKind.INT) + def test_fully_qualified_type_name(): + source = 'namespace uiae { struct X { typedef int sometype; }; }' + tu = get_tu(source, lang='cpp') + + cls = get_cursor(tu, 'sometype') + fqn = cls.type.fully_qualified_name + self.assertTrue(fqn.endswith("uiae::X::sometype"), fqn) + def test_semantic_parent(self): tu = get_tu(kParentTest, "cpp") curs = get_cursors(tu, "f") diff -pur spack-src-new/clang/include/clang-c/Index.h spack-src-newer/clang/include/clang-c/Index.h --- spack-src-new/clang/include/clang-c/Index.h 2025-02-03 19:31:47.829181660 +0100 +++ spack-src-newer/clang/include/clang-c/Index.h 2025-02-03 19:37:22.846569316 +0100 @@ -34,7 +34,7 @@ * compatible, thus CINDEX_VERSION_MAJOR is expected to remain stable. */ #define CINDEX_VERSION_MAJOR 0 -#define CINDEX_VERSION_MINOR 65 +#define CINDEX_VERSION_MINOR 66 #define CINDEX_VERSION_ENCODE(major, minor) (((major)*10000) + ((minor)*1)) @@ -4762,6 +4762,28 @@ CINDEX_LINKAGE CXSourceLocation clang_ge */ CINDEX_LINKAGE CXSourceRange clang_getTokenExtent(CXTranslationUnit, CXToken); +typedef enum { + /** + * \brief Used to indicate that no special tokenization options are needed. + */ + CXTokenize_None = 0x0, + + /** + * \brief Used to indicate that tokens for whitespace should be returned. + */ + CXTokenize_KeepWhitespace = 0x1 +} CXTokenize_Flags; + +/** + * \brief Tokenize the source code described by the given range into raw + * lexical tokens. + * + * \see clang_tokenizeRange + * + */ +CINDEX_LINKAGE void clang_tokenize(CXTranslationUnit TU, CXSourceRange Range, + CXToken **Tokens, unsigned *NumTokens); + /** * Tokenize the source code described by the given range into raw * lexical tokens. @@ -4778,9 +4800,13 @@ CINDEX_LINKAGE CXSourceRange clang_getTo * \param NumTokens will be set to the number of tokens in the \c *Tokens * array. * + * \param options A bitmask of options that affects tokenization. This should be + * a bitwise OR of the CXTokenize_XXX flags. + * */ -CINDEX_LINKAGE void clang_tokenize(CXTranslationUnit TU, CXSourceRange Range, - CXToken **Tokens, unsigned *NumTokens); +CINDEX_LINKAGE void clang_tokenizeRange(CXTranslationUnit TU, + CXSourceRange Range, CXToken **Tokens, + unsigned *NumTokens, unsigned options); /** * Annotate the given set of tokens by providing cursors for each token diff -pur spack-src-new/clang/tools/libclang/CIndex.cpp spack-src-newer/clang/tools/libclang/CIndex.cpp --- spack-src-new/clang/tools/libclang/CIndex.cpp 2023-10-31 09:00:30.000000000 +0100 +++ spack-src-newer/clang/tools/libclang/CIndex.cpp 2025-02-03 19:34:29.861852808 +0100 @@ -7199,7 +7199,7 @@ CXSourceRange clang_getTokenExtent(CXTra } static void getTokens(ASTUnit *CXXUnit, SourceRange Range, - SmallVectorImpl<CXToken> &CXTokens) { + SmallVectorImpl<CXToken> &CXTokens, unsigned options) { SourceManager &SourceMgr = CXXUnit->getSourceManager(); std::pair<FileID, unsigned> BeginLocInfo = SourceMgr.getDecomposedSpellingLoc(Range.getBegin()); @@ -7220,6 +7220,9 @@ static void getTokens(ASTUnit *CXXUnit, CXXUnit->getASTContext().getLangOpts(), Buffer.begin(), Buffer.data() + BeginLocInfo.second, Buffer.end()); Lex.SetCommentRetentionState(true); + if (options & CXTokenize_KeepWhitespace) { + Lex.SetKeepWhitespaceMode(true); + } // Lex tokens until we hit the end of the range. const char *EffectiveBufferEnd = Buffer.data() + EndLocInfo.second; @@ -7290,7 +7293,7 @@ CXToken *clang_getToken(CXTranslationUni SM.getComposedLoc(DecomposedEnd.first, DecomposedEnd.second); SmallVector<CXToken, 32> CXTokens; - getTokens(CXXUnit, SourceRange(Begin, End), CXTokens); + getTokens(CXXUnit, SourceRange(Begin, End), CXTokens, CXTokenize_None); if (CXTokens.empty()) return nullptr; @@ -7304,6 +7307,12 @@ CXToken *clang_getToken(CXTranslationUni void clang_tokenize(CXTranslationUnit TU, CXSourceRange Range, CXToken **Tokens, unsigned *NumTokens) { + return clang_tokenizeRange(TU, Range, Tokens, NumTokens, CXTokenize_None); +} + +void clang_tokenizeRange(CXTranslationUnit TU, CXSourceRange Range, + CXToken **Tokens, unsigned *NumTokens, + unsigned options) { LOG_FUNC_SECTION { *Log << TU << ' ' << Range; } if (Tokens) @@ -7327,7 +7336,7 @@ void clang_tokenize(CXTranslationUnit TU return; SmallVector<CXToken, 32> CXTokens; - getTokens(CXXUnit, R, CXTokens); + getTokens(CXXUnit, R, CXTokens, options); if (CXTokens.empty()) return; Only in spack-src-newer/clang/tools/libclang: CIndex.cpp.orig diff -pur spack-src-new/clang/tools/libclang/libclang.map spack-src-newer/clang/tools/libclang/libclang.map --- spack-src-new/clang/tools/libclang/libclang.map 2025-02-03 19:30:50.280943291 +0100 +++ spack-src-newer/clang/tools/libclang/libclang.map 2025-02-03 19:34:29.861852808 +0100 @@ -399,6 +399,7 @@ LLVM_13 { clang_suspendTranslationUnit; clang_toggleCrashRecovery; clang_tokenize; + clang_tokenizeRange; clang_uninstall_llvm_fatal_error_handler; clang_visitChildren; clang_visitChildrenWithBlock; Only in spack-src-newer/clang/tools/libclang: libclang.map.orig