diff --git a/components/Text.h b/components/Text.h index 859e868..8808e01 100644 --- a/components/Text.h +++ b/components/Text.h @@ -53,11 +53,9 @@ public: // printf("Add: %s\n", part.text.c_str()); } } - if(line.length() > 0) { - lines_.push_back(line); - if(maxLineWidth_ < line.length()) { - maxLineWidth_ = line.length(); - } + lines_.push_back(line); + if(maxLineWidth_ < line.length()) { + maxLineWidth_ = line.length(); } } diff --git a/data/highlight/cpp/keywords.txt b/data/highlight/cpp/keywords.txt index 8294cbe..30155ac 100644 --- a/data/highlight/cpp/keywords.txt +++ b/data/highlight/cpp/keywords.txt @@ -28,6 +28,7 @@ if inline int long +include mutable namespace new @@ -60,6 +61,8 @@ virtual void volatile wchar_t +while +for //some useful keywords std diff --git a/data/highlight/cpp/rules.txt b/data/highlight/cpp/rules.txt new file mode 100644 index 0000000..4d7a8e6 --- /dev/null +++ b/data/highlight/cpp/rules.txt @@ -0,0 +1,6 @@ +TOKEN_TYPE_COMMENT:2 +TOKEN_TYPE_RESERVE_WORD:9 +TOKEN_TYPE_STRING:10 +TOKEN_TYPE_NUMBER:11 +TOKEN_TYPE_IDENTIFIER:7 +TOKEN_TYPE_OPERATOR_OR_DELIMITER:13 \ No newline at end of file diff --git a/utils/LexicalAnalysis.cpp b/utils/LexicalAnalysis.cpp index 8858739..ba60cb3 100644 --- a/utils/LexicalAnalysis.cpp +++ b/utils/LexicalAnalysis.cpp @@ -24,21 +24,21 @@ else j=b-a;\n\ MyVector tokens = lexicalAnalysis.tokenize(); std::cout << "Tokenized text: " << std::endl; for (int i = 0; i < tokens.size(); i++) { - if(tokens[i].type == TokenType::TOKEN_TYPE_IDENTIFIER) { + if(tokens[i].type == CodeTokenType::TOKEN_TYPE_IDENTIFIER) { std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Identifier: "; - } else if(tokens[i].type == TokenType::TOKEN_TYPE_NUMBER) { + } else if(tokens[i].type == CodeTokenType::TOKEN_TYPE_NUMBER) { std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Number: "; - } else if(tokens[i].type == TokenType::TOKEN_TYPE_OPERATOR_OR_DELIMITER) { + } else if(tokens[i].type == CodeTokenType::TOKEN_TYPE_OPERATOR_OR_DELIMITER) { std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Operator or Delimiter: "; - } else if(tokens[i].type == TokenType::TOKEN_TYPE_STRING) { + } else if(tokens[i].type == CodeTokenType::TOKEN_TYPE_STRING) { std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "String: "; - } else if(tokens[i].type == TokenType::TOKEN_TYPE_RESERVE_WORD) { + } else if(tokens[i].type == CodeTokenType::TOKEN_TYPE_RESERVE_WORD) { std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Reserve word: "; - } else if(tokens[i].type == TokenType::TOKEN_TYPE_EOF) { + } else if(tokens[i].type == CodeTokenType::TOKEN_TYPE_EOF) { std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "End of file."; - } else if(tokens[i].type == TokenType::TOKEN_TYPE_COMMENT) { + } else if(tokens[i].type == CodeTokenType::TOKEN_TYPE_COMMENT) { std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Comment: "; - } else if(tokens[i].type == TokenType::TOKEN_TYPE_NEWLINE) { + } else if(tokens[i].type == CodeTokenType::TOKEN_TYPE_NEWLINE) { std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Newline "; } else { std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Unknown token: "; diff --git a/utils/LexicalAnalysis.h b/utils/LexicalAnalysis.h index 4359d64..d1cf706 100644 --- a/utils/LexicalAnalysis.h +++ b/utils/LexicalAnalysis.h @@ -6,7 +6,7 @@ #include #include "../mystl/my_vector.h" -enum class TokenType { +enum class CodeTokenType { TOKEN_TYPE_IDENTIFIER = 9999, TOKEN_TYPE_NUMBER = 10000, TOKEN_TYPE_STRING = 10001, @@ -15,15 +15,70 @@ enum class TokenType { TOKEN_TYPE_EOF, TOKEN_TYPE_UNDEFINED, TOKEN_TYPE_COMMENT, - TOKEN_TYPE_NEWLINE + TOKEN_TYPE_NEWLINE, + TOKEN_TYPE_SPACE }; +//用于将字符串和CodeTokenType互转的函数 +std::string CodeTokenTypeToString(CodeTokenType type) { + switch(type) { + case CodeTokenType::TOKEN_TYPE_IDENTIFIER: + return "TOKEN_TYPE_IDENTIFIER"; + case CodeTokenType::TOKEN_TYPE_NUMBER: + return "TOKEN_TYPE_NUMBER"; + case CodeTokenType::TOKEN_TYPE_STRING: + return "TOKEN_TYPE_STRING"; + case CodeTokenType::TOKEN_TYPE_RESERVE_WORD: + return "TOKEN_TYPE_RESERVE_WORD"; + case CodeTokenType::TOKEN_TYPE_OPERATOR_OR_DELIMITER: + return "TOKEN_TYPE_OPERATOR_OR_DELIMITER"; + case CodeTokenType::TOKEN_TYPE_EOF: + return "TOKEN_TYPE_EOF"; + case CodeTokenType::TOKEN_TYPE_UNDEFINED: + return "TOKEN_TYPE_UNDEFINED"; + case CodeTokenType::TOKEN_TYPE_COMMENT: + return "TOKEN_TYPE_COMMENT"; + case CodeTokenType::TOKEN_TYPE_NEWLINE: + return "TOKEN_TYPE_NEWLINE"; + case CodeTokenType::TOKEN_TYPE_SPACE: + return "TOKEN_TYPE_SPACE"; + default: + return "TOKEN_TYPE_UNDEFINED"; + } +} + +CodeTokenType stringToCodeTokenType(std::string str) { + if(str == "TOKEN_TYPE_IDENTIFIER") { + return CodeTokenType::TOKEN_TYPE_IDENTIFIER; + } else if(str == "TOKEN_TYPE_NUMBER") { + return CodeTokenType::TOKEN_TYPE_NUMBER; + } else if(str == "TOKEN_TYPE_STRING") { + return CodeTokenType::TOKEN_TYPE_STRING; + } else if(str == "TOKEN_TYPE_RESERVE_WORD") { + return CodeTokenType::TOKEN_TYPE_RESERVE_WORD; + } else if(str == "TOKEN_TYPE_OPERATOR_OR_DELIMITER") { + return CodeTokenType::TOKEN_TYPE_OPERATOR_OR_DELIMITER; + } else if(str == "TOKEN_TYPE_EOF") { + return CodeTokenType::TOKEN_TYPE_EOF; + } else if(str == "TOKEN_TYPE_UNDEFINED") { + return CodeTokenType::TOKEN_TYPE_UNDEFINED; + } else if(str == "TOKEN_TYPE_COMMENT") { + return CodeTokenType::TOKEN_TYPE_COMMENT; + } else if(str == "TOKEN_TYPE_NEWLINE") { + return CodeTokenType::TOKEN_TYPE_NEWLINE; + } else if(str == "TOKEN_TYPE_SPACE") { + return CodeTokenType::TOKEN_TYPE_SPACE; + } else { + return CodeTokenType::TOKEN_TYPE_UNDEFINED; + } +} + struct Token { std::string value; - TokenType type; + CodeTokenType type; Token() {} - Token(std::string value, TokenType type) { + Token(std::string value, CodeTokenType type) { this->value = value; this->type = type; } @@ -120,10 +175,14 @@ private: void Scan(int & currentIndex) { currentToken = ""; - while(preprocessedText[currentIndex] == ' ') { - currentToken += preprocessedText[currentIndex++]; + if(preprocessedText[currentIndex] == ' ' || preprocessedText[currentIndex] == '\n' || preprocessedText[currentIndex] == '\t') { + while(preprocessedText[currentIndex] == ' ' || preprocessedText[currentIndex] == '\n' || preprocessedText[currentIndex] == '\t') { + currentToken += preprocessedText[currentIndex++]; + } + syn = static_cast(CodeTokenType::TOKEN_TYPE_SPACE); + return; } - + // printf("current letter: [%d]%c\n", currentIndex, preprocessedText[currentIndex]); if(isLetter(preprocessedText[currentIndex])) { @@ -132,7 +191,7 @@ private: } syn = searchReserveWord(currentToken); - syn = syn == -1 ? static_cast(TokenType::TOKEN_TYPE_IDENTIFIER) : syn; + syn = syn == -1 ? static_cast(CodeTokenType::TOKEN_TYPE_IDENTIFIER) : syn; return; } else if(isDigit(preprocessedText[currentIndex])) { @@ -140,7 +199,7 @@ private: currentToken += preprocessedText[currentIndex++]; } - syn = static_cast(TokenType::TOKEN_TYPE_NUMBER); + syn = static_cast(CodeTokenType::TOKEN_TYPE_NUMBER); return; } else if((isOperatorOrDelimiter(std::string(1, preprocessedText[currentIndex])) != -1) && @@ -151,7 +210,7 @@ private: currentToken += preprocessedText[currentIndex++]; } currentToken += preprocessedText[currentIndex++]; - syn = static_cast(TokenType::TOKEN_TYPE_STRING); + syn = static_cast(CodeTokenType::TOKEN_TYPE_STRING); return; } if(preprocessedText[currentIndex] == '\'') { @@ -160,7 +219,7 @@ private: currentToken += preprocessedText[currentIndex++]; } currentToken += preprocessedText[currentIndex++]; - syn = static_cast(TokenType::TOKEN_TYPE_STRING); + syn = static_cast(CodeTokenType::TOKEN_TYPE_STRING); return; } currentToken += preprocessedText[currentIndex++]; @@ -182,7 +241,7 @@ private: currentToken += preprocessedText[currentIndex]; currentToken += preprocessedText[currentIndex + 1]; if(currentToken == "//") { - syn = static_cast(TokenType::TOKEN_TYPE_COMMENT); + syn = static_cast(CodeTokenType::TOKEN_TYPE_COMMENT); currentIndex += 2; while(preprocessedText[currentIndex] != '\n' && currentIndex < preprocessedText.size()) { currentToken += preprocessedText[currentIndex++]; @@ -190,7 +249,7 @@ private: return; } if(currentToken == "/*") { - syn = static_cast(TokenType::TOKEN_TYPE_COMMENT); + syn = static_cast(CodeTokenType::TOKEN_TYPE_COMMENT); currentIndex += 2; while(currentIndex < rawText.size() - 1 && !(preprocessedText[currentIndex] == '*' && preprocessedText[currentIndex + 1] == '/')) { currentToken += preprocessedText[currentIndex++]; @@ -213,16 +272,16 @@ private: } return; } else if (preprocessedText[currentIndex] == '\0' || currentIndex >= preprocessedText.size()) { - syn = static_cast(TokenType::TOKEN_TYPE_EOF); + syn = static_cast(CodeTokenType::TOKEN_TYPE_EOF); currentIndex++; return; } else if(preprocessedText[currentIndex] == '\n') { - syn = static_cast(TokenType::TOKEN_TYPE_NEWLINE); + syn = static_cast(CodeTokenType::TOKEN_TYPE_NEWLINE); currentToken = "\n"; currentIndex++; return; } else { - syn = static_cast(TokenType::TOKEN_TYPE_UNDEFINED); + syn = static_cast(CodeTokenType::TOKEN_TYPE_UNDEFINED); currentIndex++; return; } @@ -263,39 +322,39 @@ public: syn = -1; int currentIndex = 0; tokens.clear(); - while(syn != static_cast(TokenType::TOKEN_TYPE_EOF) && syn != static_cast(TokenType::TOKEN_TYPE_UNDEFINED)) { + while(syn != static_cast(CodeTokenType::TOKEN_TYPE_EOF) && syn != static_cast(CodeTokenType::TOKEN_TYPE_UNDEFINED)) { Scan(currentIndex); - printf("currentToken: [%s]\n", currentToken.c_str()); - if(syn == static_cast(TokenType::TOKEN_TYPE_STRING)) { - tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_STRING)); + // printf("currentToken: [%s]\n", currentToken.c_str()); + if(syn == static_cast(CodeTokenType::TOKEN_TYPE_STRING)) { + tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_STRING)); // printf("string: %s\n", currentToken.c_str()); - } else if(syn == static_cast(TokenType::TOKEN_TYPE_IDENTIFIER)) { - tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_IDENTIFIER)); + } else if(syn == static_cast(CodeTokenType::TOKEN_TYPE_IDENTIFIER)) { + tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_IDENTIFIER)); // printf("identifier: %s\n", currentToken.c_str()); - } else if(syn == static_cast(TokenType::TOKEN_TYPE_NUMBER)) { - tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_NUMBER)); + } else if(syn == static_cast(CodeTokenType::TOKEN_TYPE_NUMBER)) { + tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_NUMBER)); // printf("number: %s\n", currentToken.c_str()); } else if(syn > 0 && syn < reserveWordCount) { - tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_RESERVE_WORD)); + tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_RESERVE_WORD)); // printf("reserve word: %s\n", currentToken.c_str()); } else if(syn >= reserveWordCount && syn < reserveWordCount + operatorAndDelimiterCount) { - tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_OPERATOR_OR_DELIMITER)); + tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_OPERATOR_OR_DELIMITER)); // printf("operator or delimiter: %s\n", currentToken.c_str()); - } else if(syn == static_cast(TokenType::TOKEN_TYPE_COMMENT)) { - tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_COMMENT)); + } else if(syn == static_cast(CodeTokenType::TOKEN_TYPE_COMMENT)) { + tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_COMMENT)); // printf("comment: %s\n", currentToken.c_str()); - } else if(syn == static_cast(TokenType::TOKEN_TYPE_EOF)) { - tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_EOF)); + } else if(syn == static_cast(CodeTokenType::TOKEN_TYPE_EOF)) { + tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_EOF)); // printf("EOF: %s\n", currentToken.c_str()); - } else if(syn == static_cast(TokenType::TOKEN_TYPE_NEWLINE)) { - tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_NEWLINE)); + } else if(syn == static_cast(CodeTokenType::TOKEN_TYPE_NEWLINE)) { + tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_NEWLINE)); // printf("newline: %s\n", currentToken.c_str()); } else { - tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_UNDEFINED)); + tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_UNDEFINED)); // printf("undefined: %s\n", currentToken.c_str()); } if(currentIndex >= preprocessedText.length()) { - syn = static_cast(TokenType::TOKEN_TYPE_EOF); + syn = static_cast(CodeTokenType::TOKEN_TYPE_EOF); } } return tokens; diff --git a/utils/SyntaxHighlighter.h b/utils/SyntaxHighlighter.h index cd306a4..dcd591e 100644 --- a/utils/SyntaxHighlighter.h +++ b/utils/SyntaxHighlighter.h @@ -4,12 +4,59 @@ #include "Color.h" #include "RichText.h" #include +#include "../mystl/my_vector.h" +#include +#include "LexicalAnalysis.h" + +struct HighlightRule { + CodeTokenType type; + MColor color; +}; class SyntaxHighlighter { private: std::string ruleName; - + MyVector rules; +public: + SyntaxHighlighter(std::string ruleName) { + this->ruleName = ruleName; + std::ifstream file("../data/highlight/" + ruleName + "/rules.txt"); // Just for unit test + // std::ifstream file("./data/highlight/" + ruleName + "/keywords.txt"); + if (file.is_open()) { + // printf("keywords file open\n"); + std::string line; + while (std::getline(file, line)) { + //every line is like "ruleName(string):color(short)" + CodeTokenType type = stringToCodeTokenType(line.substr(0, line.find(":"))); + MColor color = static_cast(std::stoi(line.substr(line.find(":") + 1))); + rules.push_back(HighlightRule{type, color}); + } + file.close(); + } + } + + RichText highlight(std::string text) { + LexicalAnalysis lexicalAnalysis(this->ruleName); + lexicalAnalysis.setRawText(text); + MyVector tokens = lexicalAnalysis.tokenize(); + RichText richText; + for (int i = 0; i < tokens.size(); i++) { + Token token = tokens[i]; + bool hasMatched = false; + for (int j = 0; j < rules.size(); j++) { + HighlightRule rule = rules[j]; + if (token.type == rule.type) { + hasMatched = true; + richText += RichText(token.value, rule.color); + } + } + if (!hasMatched) { + richText += RichText(token.value, COLOR_WHITE); + } + } + return richText; + } }; #endif // SYNTAX_HIGHLIGHTER_H \ No newline at end of file diff --git a/utils/SyntaxHighlighter_test.cpp b/utils/SyntaxHighlighter_test.cpp new file mode 100644 index 0000000..b74bfff --- /dev/null +++ b/utils/SyntaxHighlighter_test.cpp @@ -0,0 +1,76 @@ +#include "SyntaxHighlighter.h" +#include "../components/TextArea.h" + +int main() { + SyntaxHighlighter highlighter = SyntaxHighlighter("cpp"); + TextArea textArea = TextArea(1, 1, 100, 28); + std::string rawText; + // read in SyntaxHighlighter_test.cpp + std::ifstream file("SyntaxHighlighter_test.cpp"); + std::string line; + while (std::getline(file, line)) { + rawText += line + "\n"; + } + file.close(); + + RichText richText = highlighter.highlight(rawText); + + // // ´´½¨ºǫ́»º³åÇø + // HANDLE hBackBuffer = CreateConsoleScreenBuffer(GENERIC_READ | GENERIC_WRITE, 0, NULL, CONSOLE_TEXTMODE_BUFFER, NULL); + HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE); + + // // Çå¿Õºǫ́»º³åÇø + // CONSOLE_SCREEN_BUFFER_INFO csbi; + // GetConsoleScreenBufferInfo(hConsole, &csbi); + // DWORD dwBytesWritten; + // FillConsoleOutputCharacter(hBackBuffer, ' ', csbi.dwSize.X * csbi.dwSize.Y, {0, 0}, &dwBytesWritten); + // FillConsoleOutputAttribute(hBackBuffer, csbi.wAttributes, csbi.dwSize.X * csbi.dwSize.Y, {0, 0}, &dwBytesWritten); + CONSOLE_CURSOR_INFO cci; + cci.bVisible = false; + cci.dwSize = 1; + //SetConsoleCursorInfo(hBackBuffer, &cci); + SetConsoleCursorInfo(hConsole, &cci); + + textArea.setTitle(RichText("SyntaxHighlighter Test", COLOR_LIGHTRED)); + textArea.setText(richText); + textArea.draw(); + + while(true) { + if (_kbhit()) { + // SetConsoleActiveScreenBuffer(hBackBuffer); + char opt = _getch(); + + switch(opt) { + case 72: + textArea.moveUp(); + break; + case 80: + textArea.moveDown(); + break; + case 75: + textArea.moveLeft(); + break; + case 77: + textArea.moveRight(); + break; + case 'q': + // SetConsoleActiveScreenBuffer(hConsole); + // CloseHandle(hBackBuffer); + return 0; + } + + // // Çå¿Õºǫ́»º³åÇø + // FillConsoleOutputCharacter(hBackBuffer, ' ', csbi.dwSize.X * csbi.dwSize.Y, {0, 0}, &dwBytesWritten); + // FillConsoleOutputAttribute(hBackBuffer, csbi.wAttributes, csbi.dwSize.X * csbi.dwSize.Y, {0, 0}, &dwBytesWritten); + + // // ÔÚºǫ́»º³åÇøÖлæÖÆ + textArea.draw(); + + // // Çл»µ½ºǫ́»º³åÇø£¬ÏÔʾ»æÖƵÄÄÚÈÝ + // SetConsoleActiveScreenBuffer(hConsole); + } + Sleep(1); + } + + return 0; +} \ No newline at end of file