实现了SyntaxHighlighter

2025-07-09 10:54:37 +00:00 · 2024-12-14 23:38:20 +08:00 · 2024-12-14 23:38:20 +08:00 · 22617d26ce
commit 22617d26ce
parent fafb31ac7f
7 changed files with 237 additions and 48 deletions
--- a/components/Text.h
+++ b/components/Text.h
@ -53,11 +53,9 @@ public:
                // printf("Add: %s\n", part.text.c_str());
            }
        }
-        if(line.length() > 0) {
-            lines_.push_back(line);
-            if(maxLineWidth_ < line.length()) {
-                maxLineWidth_ = line.length();
-            }
+        lines_.push_back(line);
+        if(maxLineWidth_ < line.length()) {
+            maxLineWidth_ = line.length();
        }
    }

--- a/data/highlight/cpp/keywords.txt
+++ b/data/highlight/cpp/keywords.txt
@ -28,6 +28,7 @@ if
 inline
 int
 long
+include
 mutable
 namespace
 new
@ -60,6 +61,8 @@ virtual
 void
 volatile
 wchar_t
+while
+for

 //some useful keywords
 std
--- a/data/highlight/cpp/rules.txt
+++ b/data/highlight/cpp/rules.txt
@ -0,0 +1,6 @@
+TOKEN_TYPE_COMMENT:2
+TOKEN_TYPE_RESERVE_WORD:9
+TOKEN_TYPE_STRING:10
+TOKEN_TYPE_NUMBER:11
+TOKEN_TYPE_IDENTIFIER:7
+TOKEN_TYPE_OPERATOR_OR_DELIMITER:13
--- a/utils/LexicalAnalysis.cpp
+++ b/utils/LexicalAnalysis.cpp
@ -24,21 +24,21 @@ else  j=b-a;\n\
    MyVector<Token> tokens = lexicalAnalysis.tokenize();
    std::cout << "Tokenized text: " << std::endl;
    for (int i = 0; i < tokens.size(); i++) {
-        if(tokens[i].type == TokenType::TOKEN_TYPE_IDENTIFIER) {
+        if(tokens[i].type == CodeTokenType::TOKEN_TYPE_IDENTIFIER) {
            std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Identifier: ";
-        } else if(tokens[i].type == TokenType::TOKEN_TYPE_NUMBER) {
+        } else if(tokens[i].type == CodeTokenType::TOKEN_TYPE_NUMBER) {
            std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Number: ";
-        } else if(tokens[i].type == TokenType::TOKEN_TYPE_OPERATOR_OR_DELIMITER) {
+        } else if(tokens[i].type == CodeTokenType::TOKEN_TYPE_OPERATOR_OR_DELIMITER) {
            std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Operator or Delimiter: ";
-        } else if(tokens[i].type == TokenType::TOKEN_TYPE_STRING) {
+        } else if(tokens[i].type == CodeTokenType::TOKEN_TYPE_STRING) {
            std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "String: ";
-        } else if(tokens[i].type == TokenType::TOKEN_TYPE_RESERVE_WORD) {
+        } else if(tokens[i].type == CodeTokenType::TOKEN_TYPE_RESERVE_WORD) {
            std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Reserve word: ";
-        } else if(tokens[i].type == TokenType::TOKEN_TYPE_EOF) {
+        } else if(tokens[i].type == CodeTokenType::TOKEN_TYPE_EOF) {
            std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "End of file.";
-        } else if(tokens[i].type == TokenType::TOKEN_TYPE_COMMENT) {
+        } else if(tokens[i].type == CodeTokenType::TOKEN_TYPE_COMMENT) {
            std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Comment: ";
-        } else if(tokens[i].type == TokenType::TOKEN_TYPE_NEWLINE) {
+        } else if(tokens[i].type == CodeTokenType::TOKEN_TYPE_NEWLINE) {
            std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Newline  ";
        } else {
            std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Unknown token: ";
--- a/utils/LexicalAnalysis.h
+++ b/utils/LexicalAnalysis.h
@ -6,7 +6,7 @@
 #include <fstream>
 #include "../mystl/my_vector.h"

-enum class TokenType {
+enum class CodeTokenType {
    TOKEN_TYPE_IDENTIFIER = 9999,
    TOKEN_TYPE_NUMBER = 10000,
    TOKEN_TYPE_STRING = 10001,
@ -15,15 +15,70 @@ enum class TokenType {
    TOKEN_TYPE_EOF,
    TOKEN_TYPE_UNDEFINED,
    TOKEN_TYPE_COMMENT,
-    TOKEN_TYPE_NEWLINE
+    TOKEN_TYPE_NEWLINE,
+    TOKEN_TYPE_SPACE
 };

+//用于将字符串和CodeTokenType互转的函数
+std::string CodeTokenTypeToString(CodeTokenType type) {
+    switch(type) {
+        case CodeTokenType::TOKEN_TYPE_IDENTIFIER:
+            return "TOKEN_TYPE_IDENTIFIER";
+        case CodeTokenType::TOKEN_TYPE_NUMBER:
+            return "TOKEN_TYPE_NUMBER";
+        case CodeTokenType::TOKEN_TYPE_STRING:
+            return "TOKEN_TYPE_STRING";
+        case CodeTokenType::TOKEN_TYPE_RESERVE_WORD:
+            return "TOKEN_TYPE_RESERVE_WORD";
+        case CodeTokenType::TOKEN_TYPE_OPERATOR_OR_DELIMITER:
+            return "TOKEN_TYPE_OPERATOR_OR_DELIMITER";
+        case CodeTokenType::TOKEN_TYPE_EOF:
+            return "TOKEN_TYPE_EOF";
+        case CodeTokenType::TOKEN_TYPE_UNDEFINED:
+            return "TOKEN_TYPE_UNDEFINED";
+        case CodeTokenType::TOKEN_TYPE_COMMENT:
+            return "TOKEN_TYPE_COMMENT";
+        case CodeTokenType::TOKEN_TYPE_NEWLINE:
+            return "TOKEN_TYPE_NEWLINE";
+        case CodeTokenType::TOKEN_TYPE_SPACE:
+            return "TOKEN_TYPE_SPACE";
+        default:
+            return "TOKEN_TYPE_UNDEFINED";
+    }
+}
+
+CodeTokenType stringToCodeTokenType(std::string str) {
+    if(str == "TOKEN_TYPE_IDENTIFIER") {
+        return CodeTokenType::TOKEN_TYPE_IDENTIFIER;
+    } else if(str == "TOKEN_TYPE_NUMBER") {
+        return CodeTokenType::TOKEN_TYPE_NUMBER;
+    } else if(str == "TOKEN_TYPE_STRING") {
+        return CodeTokenType::TOKEN_TYPE_STRING;
+    } else if(str == "TOKEN_TYPE_RESERVE_WORD") {
+        return CodeTokenType::TOKEN_TYPE_RESERVE_WORD;
+    } else if(str == "TOKEN_TYPE_OPERATOR_OR_DELIMITER") {
+        return CodeTokenType::TOKEN_TYPE_OPERATOR_OR_DELIMITER;
+    } else if(str == "TOKEN_TYPE_EOF") {
+        return CodeTokenType::TOKEN_TYPE_EOF;
+    } else if(str == "TOKEN_TYPE_UNDEFINED") {
+        return CodeTokenType::TOKEN_TYPE_UNDEFINED;
+    } else if(str == "TOKEN_TYPE_COMMENT") {
+        return CodeTokenType::TOKEN_TYPE_COMMENT;
+    } else if(str == "TOKEN_TYPE_NEWLINE") {
+        return CodeTokenType::TOKEN_TYPE_NEWLINE;
+    } else if(str == "TOKEN_TYPE_SPACE") {
+        return CodeTokenType::TOKEN_TYPE_SPACE;
+    } else {
+        return CodeTokenType::TOKEN_TYPE_UNDEFINED;
+    }
+}
+
 struct Token {
    std::string value;
-    TokenType type;
+    CodeTokenType type;

    Token() {}
-    Token(std::string value, TokenType type) {
+    Token(std::string value, CodeTokenType type) {
        this->value = value;
        this->type = type;
    }
@ -120,8 +175,12 @@ private:
    void Scan(int & currentIndex) {
        currentToken = "";

-        while(preprocessedText[currentIndex] == ' ') {
-            currentToken += preprocessedText[currentIndex++];
+        if(preprocessedText[currentIndex] == ' ' || preprocessedText[currentIndex] == '\n' || preprocessedText[currentIndex] == '\t') {
+            while(preprocessedText[currentIndex] == ' ' || preprocessedText[currentIndex] == '\n' || preprocessedText[currentIndex] == '\t') {
+                currentToken += preprocessedText[currentIndex++];
+            }
+            syn = static_cast<int>(CodeTokenType::TOKEN_TYPE_SPACE);
+            return;
        }
        
        // printf("current letter: [%d]%c\n", currentIndex, preprocessedText[currentIndex]);
@ -132,7 +191,7 @@ private:
            }

            syn = searchReserveWord(currentToken);
-            syn = syn == -1 ? static_cast<int>(TokenType::TOKEN_TYPE_IDENTIFIER) : syn;
+            syn = syn == -1 ? static_cast<int>(CodeTokenType::TOKEN_TYPE_IDENTIFIER) : syn;
            
            return;
        } else if(isDigit(preprocessedText[currentIndex])) {
@ -140,7 +199,7 @@ private:
                currentToken += preprocessedText[currentIndex++];
            }

-            syn = static_cast<int>(TokenType::TOKEN_TYPE_NUMBER);
+            syn = static_cast<int>(CodeTokenType::TOKEN_TYPE_NUMBER);

            return;
        } else if((isOperatorOrDelimiter(std::string(1, preprocessedText[currentIndex])) != -1) && 
@ -151,7 +210,7 @@ private:
                    currentToken += preprocessedText[currentIndex++];
                }
                currentToken += preprocessedText[currentIndex++];
-                syn = static_cast<int>(TokenType::TOKEN_TYPE_STRING);
+                syn = static_cast<int>(CodeTokenType::TOKEN_TYPE_STRING);
                return;
            }
            if(preprocessedText[currentIndex] == '\'') {
@ -160,7 +219,7 @@ private:
                    currentToken += preprocessedText[currentIndex++];
                }
                currentToken += preprocessedText[currentIndex++];
-                syn = static_cast<int>(TokenType::TOKEN_TYPE_STRING);
+                syn = static_cast<int>(CodeTokenType::TOKEN_TYPE_STRING);
                return;
            }
            currentToken += preprocessedText[currentIndex++];
@ -182,7 +241,7 @@ private:
                currentToken += preprocessedText[currentIndex];
                currentToken += preprocessedText[currentIndex + 1];
                if(currentToken == "//") {
-                    syn = static_cast<int>(TokenType::TOKEN_TYPE_COMMENT);
+                    syn = static_cast<int>(CodeTokenType::TOKEN_TYPE_COMMENT);
                    currentIndex += 2;
                    while(preprocessedText[currentIndex] != '\n' && currentIndex < preprocessedText.size()) {
                        currentToken += preprocessedText[currentIndex++];
@ -190,7 +249,7 @@ private:
                    return;
                }
                if(currentToken == "/*") {
-                    syn = static_cast<int>(TokenType::TOKEN_TYPE_COMMENT);
+                    syn = static_cast<int>(CodeTokenType::TOKEN_TYPE_COMMENT);
                    currentIndex += 2;
                    while(currentIndex < rawText.size() - 1 && !(preprocessedText[currentIndex] == '*' && preprocessedText[currentIndex + 1] == '/')) {
                        currentToken += preprocessedText[currentIndex++];
@ -213,16 +272,16 @@ private:
            }
            return;
        } else if (preprocessedText[currentIndex] == '\0' || currentIndex >= preprocessedText.size()) {
-            syn = static_cast<int>(TokenType::TOKEN_TYPE_EOF);
+            syn = static_cast<int>(CodeTokenType::TOKEN_TYPE_EOF);
            currentIndex++;
            return;
        } else if(preprocessedText[currentIndex] == '\n') {
-            syn = static_cast<int>(TokenType::TOKEN_TYPE_NEWLINE);
+            syn = static_cast<int>(CodeTokenType::TOKEN_TYPE_NEWLINE);
            currentToken = "\n";
            currentIndex++;
            return;
        } else {
-            syn = static_cast<int>(TokenType::TOKEN_TYPE_UNDEFINED);
+            syn = static_cast<int>(CodeTokenType::TOKEN_TYPE_UNDEFINED);
            currentIndex++;
            return;
        }
@ -263,39 +322,39 @@ public:
        syn = -1;
        int currentIndex = 0;
        tokens.clear();
-        while(syn != static_cast<int>(TokenType::TOKEN_TYPE_EOF) && syn != static_cast<int>(TokenType::TOKEN_TYPE_UNDEFINED)) {
+        while(syn != static_cast<int>(CodeTokenType::TOKEN_TYPE_EOF) && syn != static_cast<int>(CodeTokenType::TOKEN_TYPE_UNDEFINED)) {
            Scan(currentIndex);
-            printf("currentToken: [%s]\n", currentToken.c_str());
-            if(syn == static_cast<int>(TokenType::TOKEN_TYPE_STRING)) {
-                tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_STRING));
+            // printf("currentToken: [%s]\n", currentToken.c_str());
+            if(syn == static_cast<int>(CodeTokenType::TOKEN_TYPE_STRING)) {
+                tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_STRING));
                // printf("string: %s\n", currentToken.c_str());
-            } else if(syn == static_cast<int>(TokenType::TOKEN_TYPE_IDENTIFIER)) {
-                tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_IDENTIFIER));
+            } else if(syn == static_cast<int>(CodeTokenType::TOKEN_TYPE_IDENTIFIER)) {
+                tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_IDENTIFIER));
                // printf("identifier: %s\n", currentToken.c_str());
-            } else if(syn == static_cast<int>(TokenType::TOKEN_TYPE_NUMBER)) {
-                tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_NUMBER));
+            } else if(syn == static_cast<int>(CodeTokenType::TOKEN_TYPE_NUMBER)) {
+                tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_NUMBER));
                // printf("number: %s\n", currentToken.c_str());
            } else if(syn > 0 && syn < reserveWordCount) {
-                tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_RESERVE_WORD));
+                tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_RESERVE_WORD));
                // printf("reserve word: %s\n", currentToken.c_str());
            } else if(syn >= reserveWordCount && syn < reserveWordCount + operatorAndDelimiterCount) {
-                tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_OPERATOR_OR_DELIMITER));
+                tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_OPERATOR_OR_DELIMITER));
                // printf("operator or delimiter: %s\n", currentToken.c_str());
-            } else if(syn == static_cast<int>(TokenType::TOKEN_TYPE_COMMENT)) {
-                tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_COMMENT));
+            } else if(syn == static_cast<int>(CodeTokenType::TOKEN_TYPE_COMMENT)) {
+                tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_COMMENT));
                // printf("comment: %s\n", currentToken.c_str());
-            } else if(syn == static_cast<int>(TokenType::TOKEN_TYPE_EOF)) {
-                tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_EOF));
+            } else if(syn == static_cast<int>(CodeTokenType::TOKEN_TYPE_EOF)) {
+                tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_EOF));
                // printf("EOF: %s\n", currentToken.c_str());
-            } else if(syn == static_cast<int>(TokenType::TOKEN_TYPE_NEWLINE)) {
-                tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_NEWLINE));
+            } else if(syn == static_cast<int>(CodeTokenType::TOKEN_TYPE_NEWLINE)) {
+                tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_NEWLINE));
                // printf("newline: %s\n", currentToken.c_str());
            } else {
-                tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_UNDEFINED));
+                tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_UNDEFINED));
                // printf("undefined: %s\n", currentToken.c_str());
            }
            if(currentIndex >= preprocessedText.length()) {
-                syn = static_cast<int>(TokenType::TOKEN_TYPE_EOF);
+                syn = static_cast<int>(CodeTokenType::TOKEN_TYPE_EOF);
            }
        }
        return tokens;
--- a/utils/SyntaxHighlighter.h
+++ b/utils/SyntaxHighlighter.h
@ -4,12 +4,59 @@
 #include "Color.h"
 #include "RichText.h"
 #include <string>
+#include "../mystl/my_vector.h"
+#include <fstream>
+#include "LexicalAnalysis.h"
+
+struct HighlightRule {
+    CodeTokenType type;
+    MColor color;
+};

 class SyntaxHighlighter {
 private:
    std::string ruleName;
+    MyVector<HighlightRule> rules;

+public:
+    SyntaxHighlighter(std::string ruleName) {
+        this->ruleName = ruleName;
+        std::ifstream file("../data/highlight/" + ruleName + "/rules.txt"); // Just for unit test
+        // std::ifstream file("./data/highlight/" + ruleName + "/keywords.txt");
+        if (file.is_open()) {
+            // printf("keywords file open\n");
+            std::string line;
+            while (std::getline(file, line)) {
+                //every line is like "ruleName(string):color(short)"
+                CodeTokenType type = stringToCodeTokenType(line.substr(0, line.find(":")));
+                MColor color = static_cast<MColor>(std::stoi(line.substr(line.find(":") + 1)));
+                rules.push_back(HighlightRule{type, color});
+            }
+            file.close();
+        }
+    }

+    RichText highlight(std::string text) {
+        LexicalAnalysis lexicalAnalysis(this->ruleName);
+        lexicalAnalysis.setRawText(text);
+        MyVector<Token> tokens = lexicalAnalysis.tokenize();
+        RichText richText;
+        for (int i = 0; i < tokens.size(); i++) {
+            Token token = tokens[i];
+            bool hasMatched = false;
+            for (int j = 0; j < rules.size(); j++) {
+                HighlightRule rule = rules[j];
+                if (token.type == rule.type) {
+                    hasMatched = true;
+                    richText += RichText(token.value, rule.color);
+                }
+            }
+            if (!hasMatched) {
+                richText += RichText(token.value, COLOR_WHITE);
+            }
+        }
+        return richText;
+    }
 };

 #endif // SYNTAX_HIGHLIGHTER_H
--- a/utils/SyntaxHighlighter_test.cpp
+++ b/utils/SyntaxHighlighter_test.cpp
@ -0,0 +1,76 @@
+#include "SyntaxHighlighter.h"
+#include "../components/TextArea.h"
+
+int main() {
+    SyntaxHighlighter highlighter = SyntaxHighlighter("cpp");
+    TextArea textArea = TextArea(1, 1, 100, 28);
+    std::string rawText;
+    // read in SyntaxHighlighter_test.cpp
+    std::ifstream file("SyntaxHighlighter_test.cpp");
+    std::string line;
+    while (std::getline(file, line)) {
+        rawText += line + "\n";
+    }
+    file.close();
+
+    RichText richText = highlighter.highlight(rawText);
+
+    // // 创建后台缓冲区
+    // HANDLE hBackBuffer = CreateConsoleScreenBuffer(GENERIC_READ | GENERIC_WRITE, 0, NULL, CONSOLE_TEXTMODE_BUFFER, NULL);
+    HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE);
+
+    // // 清空后台缓冲区
+    // CONSOLE_SCREEN_BUFFER_INFO csbi;
+    // GetConsoleScreenBufferInfo(hConsole, &csbi);
+    // DWORD dwBytesWritten;
+    // FillConsoleOutputCharacter(hBackBuffer, ' ', csbi.dwSize.X * csbi.dwSize.Y, {0, 0}, &dwBytesWritten);
+    // FillConsoleOutputAttribute(hBackBuffer, csbi.wAttributes, csbi.dwSize.X * csbi.dwSize.Y, {0, 0}, &dwBytesWritten);
+    CONSOLE_CURSOR_INFO cci;
+    cci.bVisible = false;
+    cci.dwSize = 1;
+    //SetConsoleCursorInfo(hBackBuffer, &cci);
+    SetConsoleCursorInfo(hConsole, &cci);
+
+    textArea.setTitle(RichText("SyntaxHighlighter Test", COLOR_LIGHTRED));
+    textArea.setText(richText);
+    textArea.draw();
+
+    while(true) {
+        if (_kbhit()) {
+            // SetConsoleActiveScreenBuffer(hBackBuffer);
+            char opt = _getch();
+            
+            switch(opt) {
+                case 72:
+                    textArea.moveUp();
+                    break;
+                case 80:
+                    textArea.moveDown();
+                    break;
+                case 75:
+                    textArea.moveLeft();
+                    break;
+                case 77:
+                    textArea.moveRight();
+                    break;
+                case 'q':
+                    // SetConsoleActiveScreenBuffer(hConsole);
+                    // CloseHandle(hBackBuffer);
+                    return 0;
+            }
+        
+            // // 清空后台缓冲区
+            // FillConsoleOutputCharacter(hBackBuffer, ' ', csbi.dwSize.X * csbi.dwSize.Y, {0, 0}, &dwBytesWritten);
+            // FillConsoleOutputAttribute(hBackBuffer, csbi.wAttributes, csbi.dwSize.X * csbi.dwSize.Y, {0, 0}, &dwBytesWritten);
+
+            // // 在后台缓冲区中绘制
+            textArea.draw();
+
+            // // 切换到后台缓冲区，显示绘制的内容
+            // SetConsoleActiveScreenBuffer(hConsole);
+        }
+        Sleep(1);
+    }
+
+    return 0;
+}