Skip to content
Snippets Groups Projects
llvm17-0003-libclang-Add-option-to-keep-whitespace-when-tokenizi.patch 10.4 KiB
Newer Older
diff -pur spack-src-new/clang/bindings/python/clang/cindex.py spack-src-newer/clang/bindings/python/clang/cindex.py
--- spack-src-new/clang/bindings/python/clang/cindex.py	2025-02-03 19:31:25.893090799 +0100
+++ spack-src-newer/clang/bindings/python/clang/cindex.py	2025-02-03 19:37:00.122475192 +0100
@@ -571,7 +571,7 @@ class TokenGroup(object):
         conf.lib.clang_disposeTokens(self._tu, self._memory, self._count)
 
     @staticmethod
-    def get_tokens(tu, extent):
+    def get_tokens(tu, extent, options=0):
         """Helper method to return all tokens in an extent.
 
         This functionality is needed multiple places in this module. We define
@@ -581,6 +581,8 @@ class TokenGroup(object):
         tokens_count = c_uint()
 
         conf.lib.clang_tokenize(tu, extent, byref(tokens_memory), byref(tokens_count))
+        conf.lib.clang_tokenizeRange(
+            tu, extent, byref(tokens_memory), byref(tokens_count), options)
 
         count = int(tokens_count.value)
 
@@ -1991,13 +1993,16 @@ class Cursor(Structure):
             for descendant in child.walk_preorder():
                 yield descendant
 
-    def get_tokens(self):
+    def get_tokens(self, options=0):
         """Obtain Token instances formulating that compose this Cursor.
 
         This is a generator for Token instances. It returns all tokens which
         occupy the extent this cursor occupies.
+
+        options is a bitwise or of TokenGroup.TOKENIZE_XXX flags which will
+        control tokenization behavior.
         """
-        return TokenGroup.get_tokens(self._tu, self.extent)
+        return TokenGroup.get_tokens(self._tu, self.extent, options)
 
     def get_field_offsetof(self):
         """Returns the offsetof the FIELD_DECL pointed by this Cursor."""
@@ -3283,18 +3288,21 @@ class TranslationUnit(ClangObject):
             return CodeCompletionResults(ptr)
         return None
 
-    def get_tokens(self, locations=None, extent=None):
+    def get_tokens(self, locations=None, extent=None, options=0):
         """Obtain tokens in this translation unit.
 
         This is a generator for Token instances. The caller specifies a range
         of source code to obtain tokens for. The range can be specified as a
         2-tuple of SourceLocation or as a SourceRange. If both are defined,
         behavior is undefined.
+
+        options is a bitwise or of TokenGroup.TOKENIZE_XXX flags which will
+        control tokenization behavior.
         """
         if locations is not None:
             extent = SourceRange(start=locations[0], end=locations[1])
 
-        return TokenGroup.get_tokens(self, extent)
+        return TokenGroup.get_tokens(self, extent, options)
 
 
 class File(ClangObject):
@@ -3796,6 +3804,11 @@ functionList = [
         "clang_tokenize",
         [TranslationUnit, SourceRange, POINTER(POINTER(Token)), POINTER(c_uint)],
     ),
+    (
+        "clang_tokenizeRange",
+        [TranslationUnit, SourceRange, POINTER(POINTER(Token)), POINTER(c_uint),
+        c_uint]
+    ),
     ("clang_visitChildren", [Cursor, callbacks["cursor_visit"], py_object], c_uint),
     ("clang_Cursor_getNumArguments", [Cursor], c_int),
     ("clang_Cursor_getArgument", [Cursor, c_uint], Cursor, Cursor.from_result),
diff -pur spack-src-new/clang/bindings/python/tests/cindex/test_cursor.py spack-src-newer/clang/bindings/python/tests/cindex/test_cursor.py
--- spack-src-new/clang/bindings/python/tests/cindex/test_cursor.py	2025-02-03 19:30:48.340935255 +0100
+++ spack-src-newer/clang/bindings/python/tests/cindex/test_cursor.py	2025-02-03 19:34:29.005849262 +0100
@@ -11,6 +11,7 @@ import unittest
 from clang.cindex import AvailabilityKind
 from clang.cindex import CursorKind
 from clang.cindex import TemplateArgumentKind
+from clang.cindex import TokenGroup
 from clang.cindex import TranslationUnit
 from clang.cindex import TypeKind
 from .util import get_cursor
@@ -608,6 +609,14 @@ class TestCursor(unittest.TestCase):
         self.assertEqual(tokens[0].spelling, "int")
         self.assertEqual(tokens[1].spelling, "foo")
 
+    def test_get_tokens_with_whitespace():
+        source = 'class C { void f(); }\nvoid C::f() { }'
+        tu = get_tu(source)
+
+        tokens = list(tu.cursor.get_tokens(TokenGroup.TOKENIZE_KEEP_WHITESPACE))
+        self.assertEqual(''.join(t.spelling for t in tokens), source)
+        self.assertEqual(len(tokens), 27, [t.spelling for t in tokens])
+
     def test_get_token_cursor(self):
         """Ensure we can map tokens to cursors."""
         tu = get_tu("class A {}; int foo(A var = A());", lang="cpp")
diff -pur spack-src-new/clang/bindings/python/tests/cindex/test_cursor.py.orig spack-src-newer/clang/bindings/python/tests/cindex/test_cursor.py.orig
--- spack-src-new/clang/bindings/python/tests/cindex/test_cursor.py.orig	2023-10-31 09:00:30.000000000 +0100
+++ spack-src-newer/clang/bindings/python/tests/cindex/test_cursor.py.orig	2025-02-03 19:30:48.340935255 +0100
@@ -432,6 +432,14 @@ class TestCursor(unittest.TestCase):
         underlying = typedef.underlying_typedef_type
         self.assertEqual(underlying.kind, TypeKind.INT)
 
+    def test_fully_qualified_type_name():
+        source = 'namespace uiae { struct X { typedef int sometype; }; }'
+        tu = get_tu(source, lang='cpp')
+
+        cls = get_cursor(tu, 'sometype')
+        fqn = cls.type.fully_qualified_name
+        self.assertTrue(fqn.endswith("uiae::X::sometype"), fqn)
+
     def test_semantic_parent(self):
         tu = get_tu(kParentTest, "cpp")
         curs = get_cursors(tu, "f")
diff -pur spack-src-new/clang/include/clang-c/Index.h spack-src-newer/clang/include/clang-c/Index.h
--- spack-src-new/clang/include/clang-c/Index.h	2025-02-03 19:31:47.829181660 +0100
+++ spack-src-newer/clang/include/clang-c/Index.h	2025-02-03 19:37:22.846569316 +0100
@@ -34,7 +34,7 @@
  * compatible, thus CINDEX_VERSION_MAJOR is expected to remain stable.
  */
 #define CINDEX_VERSION_MAJOR 0
-#define CINDEX_VERSION_MINOR 65
+#define CINDEX_VERSION_MINOR 66
 
 #define CINDEX_VERSION_ENCODE(major, minor) (((major)*10000) + ((minor)*1))
 
@@ -4762,6 +4762,28 @@ CINDEX_LINKAGE CXSourceLocation clang_ge
  */
 CINDEX_LINKAGE CXSourceRange clang_getTokenExtent(CXTranslationUnit, CXToken);
 
+typedef enum {
+  /**
+   * \brief Used to indicate that no special tokenization options are needed.
+   */
+  CXTokenize_None = 0x0,
+
+  /**
+   * \brief Used to indicate that tokens for whitespace should be returned.
+   */
+  CXTokenize_KeepWhitespace = 0x1
+} CXTokenize_Flags;
+
+/**
+ * \brief Tokenize the source code described by the given range into raw
+ * lexical tokens.
+ *
+ * \see clang_tokenizeRange
+ *
+ */
+CINDEX_LINKAGE void clang_tokenize(CXTranslationUnit TU, CXSourceRange Range,
+                                   CXToken **Tokens, unsigned *NumTokens);
+
 /**
  * Tokenize the source code described by the given range into raw
  * lexical tokens.
@@ -4778,9 +4800,13 @@ CINDEX_LINKAGE CXSourceRange clang_getTo
  * \param NumTokens will be set to the number of tokens in the \c *Tokens
  * array.
  *
+ * \param options A bitmask of options that affects tokenization. This should be
+ * a bitwise OR of the CXTokenize_XXX flags.
+ *
  */
-CINDEX_LINKAGE void clang_tokenize(CXTranslationUnit TU, CXSourceRange Range,
-                                   CXToken **Tokens, unsigned *NumTokens);
+CINDEX_LINKAGE void clang_tokenizeRange(CXTranslationUnit TU,
+                                        CXSourceRange Range, CXToken **Tokens,
+                                        unsigned *NumTokens, unsigned options);
 
 /**
  * Annotate the given set of tokens by providing cursors for each token
diff -pur spack-src-new/clang/tools/libclang/CIndex.cpp spack-src-newer/clang/tools/libclang/CIndex.cpp
--- spack-src-new/clang/tools/libclang/CIndex.cpp	2023-10-31 09:00:30.000000000 +0100
+++ spack-src-newer/clang/tools/libclang/CIndex.cpp	2025-02-03 19:34:29.861852808 +0100
@@ -7199,7 +7199,7 @@ CXSourceRange clang_getTokenExtent(CXTra
 }
 
 static void getTokens(ASTUnit *CXXUnit, SourceRange Range,
-                      SmallVectorImpl<CXToken> &CXTokens) {
+                      SmallVectorImpl<CXToken> &CXTokens, unsigned options) {
   SourceManager &SourceMgr = CXXUnit->getSourceManager();
   std::pair<FileID, unsigned> BeginLocInfo =
       SourceMgr.getDecomposedSpellingLoc(Range.getBegin());
@@ -7220,6 +7220,9 @@ static void getTokens(ASTUnit *CXXUnit,
             CXXUnit->getASTContext().getLangOpts(), Buffer.begin(),
             Buffer.data() + BeginLocInfo.second, Buffer.end());
   Lex.SetCommentRetentionState(true);
+  if (options & CXTokenize_KeepWhitespace) {
+    Lex.SetKeepWhitespaceMode(true);
+  }
 
   // Lex tokens until we hit the end of the range.
   const char *EffectiveBufferEnd = Buffer.data() + EndLocInfo.second;
@@ -7290,7 +7293,7 @@ CXToken *clang_getToken(CXTranslationUni
       SM.getComposedLoc(DecomposedEnd.first, DecomposedEnd.second);
 
   SmallVector<CXToken, 32> CXTokens;
-  getTokens(CXXUnit, SourceRange(Begin, End), CXTokens);
+  getTokens(CXXUnit, SourceRange(Begin, End), CXTokens, CXTokenize_None);
 
   if (CXTokens.empty())
     return nullptr;
@@ -7304,6 +7307,12 @@ CXToken *clang_getToken(CXTranslationUni
 
 void clang_tokenize(CXTranslationUnit TU, CXSourceRange Range, CXToken **Tokens,
                     unsigned *NumTokens) {
+  return clang_tokenizeRange(TU, Range, Tokens, NumTokens, CXTokenize_None);
+}
+
+void clang_tokenizeRange(CXTranslationUnit TU, CXSourceRange Range,
+                         CXToken **Tokens, unsigned *NumTokens,
+                         unsigned options) {
   LOG_FUNC_SECTION { *Log << TU << ' ' << Range; }
 
   if (Tokens)
@@ -7327,7 +7336,7 @@ void clang_tokenize(CXTranslationUnit TU
     return;
 
   SmallVector<CXToken, 32> CXTokens;
-  getTokens(CXXUnit, R, CXTokens);
+  getTokens(CXXUnit, R, CXTokens, options);
 
   if (CXTokens.empty())
     return;
Only in spack-src-newer/clang/tools/libclang: CIndex.cpp.orig
diff -pur spack-src-new/clang/tools/libclang/libclang.map spack-src-newer/clang/tools/libclang/libclang.map
--- spack-src-new/clang/tools/libclang/libclang.map	2025-02-03 19:30:50.280943291 +0100
+++ spack-src-newer/clang/tools/libclang/libclang.map	2025-02-03 19:34:29.861852808 +0100
@@ -399,6 +399,7 @@ LLVM_13 {
     clang_suspendTranslationUnit;
     clang_toggleCrashRecovery;
     clang_tokenize;
+    clang_tokenizeRange;
     clang_uninstall_llvm_fatal_error_handler;
     clang_visitChildren;
     clang_visitChildrenWithBlock;
Only in spack-src-newer/clang/tools/libclang: libclang.map.orig