实现了SyntaxHighlighter

This commit is contained in:
梦凌汐 2024-12-14 23:38:20 +08:00
parent fafb31ac7f
commit 22617d26ce
7 changed files with 237 additions and 48 deletions

View File

@ -53,11 +53,9 @@ public:
// printf("Add: %s\n", part.text.c_str());
}
}
if(line.length() > 0) {
lines_.push_back(line);
if(maxLineWidth_ < line.length()) {
maxLineWidth_ = line.length();
}
lines_.push_back(line);
if(maxLineWidth_ < line.length()) {
maxLineWidth_ = line.length();
}
}

View File

@ -28,6 +28,7 @@ if
inline
int
long
include
mutable
namespace
new
@ -60,6 +61,8 @@ virtual
void
volatile
wchar_t
while
for
//some useful keywords
std

View File

@ -0,0 +1,6 @@
TOKEN_TYPE_COMMENT:2
TOKEN_TYPE_RESERVE_WORD:9
TOKEN_TYPE_STRING:10
TOKEN_TYPE_NUMBER:11
TOKEN_TYPE_IDENTIFIER:7
TOKEN_TYPE_OPERATOR_OR_DELIMITER:13

View File

@ -24,21 +24,21 @@ else j=b-a;\n\
MyVector<Token> tokens = lexicalAnalysis.tokenize();
std::cout << "Tokenized text: " << std::endl;
for (int i = 0; i < tokens.size(); i++) {
if(tokens[i].type == TokenType::TOKEN_TYPE_IDENTIFIER) {
if(tokens[i].type == CodeTokenType::TOKEN_TYPE_IDENTIFIER) {
std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Identifier: ";
} else if(tokens[i].type == TokenType::TOKEN_TYPE_NUMBER) {
} else if(tokens[i].type == CodeTokenType::TOKEN_TYPE_NUMBER) {
std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Number: ";
} else if(tokens[i].type == TokenType::TOKEN_TYPE_OPERATOR_OR_DELIMITER) {
} else if(tokens[i].type == CodeTokenType::TOKEN_TYPE_OPERATOR_OR_DELIMITER) {
std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Operator or Delimiter: ";
} else if(tokens[i].type == TokenType::TOKEN_TYPE_STRING) {
} else if(tokens[i].type == CodeTokenType::TOKEN_TYPE_STRING) {
std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "String: ";
} else if(tokens[i].type == TokenType::TOKEN_TYPE_RESERVE_WORD) {
} else if(tokens[i].type == CodeTokenType::TOKEN_TYPE_RESERVE_WORD) {
std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Reserve word: ";
} else if(tokens[i].type == TokenType::TOKEN_TYPE_EOF) {
} else if(tokens[i].type == CodeTokenType::TOKEN_TYPE_EOF) {
std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "End of file.";
} else if(tokens[i].type == TokenType::TOKEN_TYPE_COMMENT) {
} else if(tokens[i].type == CodeTokenType::TOKEN_TYPE_COMMENT) {
std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Comment: ";
} else if(tokens[i].type == TokenType::TOKEN_TYPE_NEWLINE) {
} else if(tokens[i].type == CodeTokenType::TOKEN_TYPE_NEWLINE) {
std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Newline ";
} else {
std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Unknown token: ";

View File

@ -6,7 +6,7 @@
#include <fstream>
#include "../mystl/my_vector.h"
enum class TokenType {
enum class CodeTokenType {
TOKEN_TYPE_IDENTIFIER = 9999,
TOKEN_TYPE_NUMBER = 10000,
TOKEN_TYPE_STRING = 10001,
@ -15,15 +15,70 @@ enum class TokenType {
TOKEN_TYPE_EOF,
TOKEN_TYPE_UNDEFINED,
TOKEN_TYPE_COMMENT,
TOKEN_TYPE_NEWLINE
TOKEN_TYPE_NEWLINE,
TOKEN_TYPE_SPACE
};
//用于将字符串和CodeTokenType互转的函数
std::string CodeTokenTypeToString(CodeTokenType type) {
switch(type) {
case CodeTokenType::TOKEN_TYPE_IDENTIFIER:
return "TOKEN_TYPE_IDENTIFIER";
case CodeTokenType::TOKEN_TYPE_NUMBER:
return "TOKEN_TYPE_NUMBER";
case CodeTokenType::TOKEN_TYPE_STRING:
return "TOKEN_TYPE_STRING";
case CodeTokenType::TOKEN_TYPE_RESERVE_WORD:
return "TOKEN_TYPE_RESERVE_WORD";
case CodeTokenType::TOKEN_TYPE_OPERATOR_OR_DELIMITER:
return "TOKEN_TYPE_OPERATOR_OR_DELIMITER";
case CodeTokenType::TOKEN_TYPE_EOF:
return "TOKEN_TYPE_EOF";
case CodeTokenType::TOKEN_TYPE_UNDEFINED:
return "TOKEN_TYPE_UNDEFINED";
case CodeTokenType::TOKEN_TYPE_COMMENT:
return "TOKEN_TYPE_COMMENT";
case CodeTokenType::TOKEN_TYPE_NEWLINE:
return "TOKEN_TYPE_NEWLINE";
case CodeTokenType::TOKEN_TYPE_SPACE:
return "TOKEN_TYPE_SPACE";
default:
return "TOKEN_TYPE_UNDEFINED";
}
}
CodeTokenType stringToCodeTokenType(std::string str) {
if(str == "TOKEN_TYPE_IDENTIFIER") {
return CodeTokenType::TOKEN_TYPE_IDENTIFIER;
} else if(str == "TOKEN_TYPE_NUMBER") {
return CodeTokenType::TOKEN_TYPE_NUMBER;
} else if(str == "TOKEN_TYPE_STRING") {
return CodeTokenType::TOKEN_TYPE_STRING;
} else if(str == "TOKEN_TYPE_RESERVE_WORD") {
return CodeTokenType::TOKEN_TYPE_RESERVE_WORD;
} else if(str == "TOKEN_TYPE_OPERATOR_OR_DELIMITER") {
return CodeTokenType::TOKEN_TYPE_OPERATOR_OR_DELIMITER;
} else if(str == "TOKEN_TYPE_EOF") {
return CodeTokenType::TOKEN_TYPE_EOF;
} else if(str == "TOKEN_TYPE_UNDEFINED") {
return CodeTokenType::TOKEN_TYPE_UNDEFINED;
} else if(str == "TOKEN_TYPE_COMMENT") {
return CodeTokenType::TOKEN_TYPE_COMMENT;
} else if(str == "TOKEN_TYPE_NEWLINE") {
return CodeTokenType::TOKEN_TYPE_NEWLINE;
} else if(str == "TOKEN_TYPE_SPACE") {
return CodeTokenType::TOKEN_TYPE_SPACE;
} else {
return CodeTokenType::TOKEN_TYPE_UNDEFINED;
}
}
struct Token {
std::string value;
TokenType type;
CodeTokenType type;
Token() {}
Token(std::string value, TokenType type) {
Token(std::string value, CodeTokenType type) {
this->value = value;
this->type = type;
}
@ -120,8 +175,12 @@ private:
void Scan(int & currentIndex) {
currentToken = "";
while(preprocessedText[currentIndex] == ' ') {
currentToken += preprocessedText[currentIndex++];
if(preprocessedText[currentIndex] == ' ' || preprocessedText[currentIndex] == '\n' || preprocessedText[currentIndex] == '\t') {
while(preprocessedText[currentIndex] == ' ' || preprocessedText[currentIndex] == '\n' || preprocessedText[currentIndex] == '\t') {
currentToken += preprocessedText[currentIndex++];
}
syn = static_cast<int>(CodeTokenType::TOKEN_TYPE_SPACE);
return;
}
// printf("current letter: [%d]%c\n", currentIndex, preprocessedText[currentIndex]);
@ -132,7 +191,7 @@ private:
}
syn = searchReserveWord(currentToken);
syn = syn == -1 ? static_cast<int>(TokenType::TOKEN_TYPE_IDENTIFIER) : syn;
syn = syn == -1 ? static_cast<int>(CodeTokenType::TOKEN_TYPE_IDENTIFIER) : syn;
return;
} else if(isDigit(preprocessedText[currentIndex])) {
@ -140,7 +199,7 @@ private:
currentToken += preprocessedText[currentIndex++];
}
syn = static_cast<int>(TokenType::TOKEN_TYPE_NUMBER);
syn = static_cast<int>(CodeTokenType::TOKEN_TYPE_NUMBER);
return;
} else if((isOperatorOrDelimiter(std::string(1, preprocessedText[currentIndex])) != -1) &&
@ -151,7 +210,7 @@ private:
currentToken += preprocessedText[currentIndex++];
}
currentToken += preprocessedText[currentIndex++];
syn = static_cast<int>(TokenType::TOKEN_TYPE_STRING);
syn = static_cast<int>(CodeTokenType::TOKEN_TYPE_STRING);
return;
}
if(preprocessedText[currentIndex] == '\'') {
@ -160,7 +219,7 @@ private:
currentToken += preprocessedText[currentIndex++];
}
currentToken += preprocessedText[currentIndex++];
syn = static_cast<int>(TokenType::TOKEN_TYPE_STRING);
syn = static_cast<int>(CodeTokenType::TOKEN_TYPE_STRING);
return;
}
currentToken += preprocessedText[currentIndex++];
@ -182,7 +241,7 @@ private:
currentToken += preprocessedText[currentIndex];
currentToken += preprocessedText[currentIndex + 1];
if(currentToken == "//") {
syn = static_cast<int>(TokenType::TOKEN_TYPE_COMMENT);
syn = static_cast<int>(CodeTokenType::TOKEN_TYPE_COMMENT);
currentIndex += 2;
while(preprocessedText[currentIndex] != '\n' && currentIndex < preprocessedText.size()) {
currentToken += preprocessedText[currentIndex++];
@ -190,7 +249,7 @@ private:
return;
}
if(currentToken == "/*") {
syn = static_cast<int>(TokenType::TOKEN_TYPE_COMMENT);
syn = static_cast<int>(CodeTokenType::TOKEN_TYPE_COMMENT);
currentIndex += 2;
while(currentIndex < rawText.size() - 1 && !(preprocessedText[currentIndex] == '*' && preprocessedText[currentIndex + 1] == '/')) {
currentToken += preprocessedText[currentIndex++];
@ -213,16 +272,16 @@ private:
}
return;
} else if (preprocessedText[currentIndex] == '\0' || currentIndex >= preprocessedText.size()) {
syn = static_cast<int>(TokenType::TOKEN_TYPE_EOF);
syn = static_cast<int>(CodeTokenType::TOKEN_TYPE_EOF);
currentIndex++;
return;
} else if(preprocessedText[currentIndex] == '\n') {
syn = static_cast<int>(TokenType::TOKEN_TYPE_NEWLINE);
syn = static_cast<int>(CodeTokenType::TOKEN_TYPE_NEWLINE);
currentToken = "\n";
currentIndex++;
return;
} else {
syn = static_cast<int>(TokenType::TOKEN_TYPE_UNDEFINED);
syn = static_cast<int>(CodeTokenType::TOKEN_TYPE_UNDEFINED);
currentIndex++;
return;
}
@ -263,39 +322,39 @@ public:
syn = -1;
int currentIndex = 0;
tokens.clear();
while(syn != static_cast<int>(TokenType::TOKEN_TYPE_EOF) && syn != static_cast<int>(TokenType::TOKEN_TYPE_UNDEFINED)) {
while(syn != static_cast<int>(CodeTokenType::TOKEN_TYPE_EOF) && syn != static_cast<int>(CodeTokenType::TOKEN_TYPE_UNDEFINED)) {
Scan(currentIndex);
printf("currentToken: [%s]\n", currentToken.c_str());
if(syn == static_cast<int>(TokenType::TOKEN_TYPE_STRING)) {
tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_STRING));
// printf("currentToken: [%s]\n", currentToken.c_str());
if(syn == static_cast<int>(CodeTokenType::TOKEN_TYPE_STRING)) {
tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_STRING));
// printf("string: %s\n", currentToken.c_str());
} else if(syn == static_cast<int>(TokenType::TOKEN_TYPE_IDENTIFIER)) {
tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_IDENTIFIER));
} else if(syn == static_cast<int>(CodeTokenType::TOKEN_TYPE_IDENTIFIER)) {
tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_IDENTIFIER));
// printf("identifier: %s\n", currentToken.c_str());
} else if(syn == static_cast<int>(TokenType::TOKEN_TYPE_NUMBER)) {
tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_NUMBER));
} else if(syn == static_cast<int>(CodeTokenType::TOKEN_TYPE_NUMBER)) {
tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_NUMBER));
// printf("number: %s\n", currentToken.c_str());
} else if(syn > 0 && syn < reserveWordCount) {
tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_RESERVE_WORD));
tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_RESERVE_WORD));
// printf("reserve word: %s\n", currentToken.c_str());
} else if(syn >= reserveWordCount && syn < reserveWordCount + operatorAndDelimiterCount) {
tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_OPERATOR_OR_DELIMITER));
tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_OPERATOR_OR_DELIMITER));
// printf("operator or delimiter: %s\n", currentToken.c_str());
} else if(syn == static_cast<int>(TokenType::TOKEN_TYPE_COMMENT)) {
tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_COMMENT));
} else if(syn == static_cast<int>(CodeTokenType::TOKEN_TYPE_COMMENT)) {
tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_COMMENT));
// printf("comment: %s\n", currentToken.c_str());
} else if(syn == static_cast<int>(TokenType::TOKEN_TYPE_EOF)) {
tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_EOF));
} else if(syn == static_cast<int>(CodeTokenType::TOKEN_TYPE_EOF)) {
tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_EOF));
// printf("EOF: %s\n", currentToken.c_str());
} else if(syn == static_cast<int>(TokenType::TOKEN_TYPE_NEWLINE)) {
tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_NEWLINE));
} else if(syn == static_cast<int>(CodeTokenType::TOKEN_TYPE_NEWLINE)) {
tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_NEWLINE));
// printf("newline: %s\n", currentToken.c_str());
} else {
tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_UNDEFINED));
tokens.push_back(Token(currentToken, CodeTokenType::TOKEN_TYPE_UNDEFINED));
// printf("undefined: %s\n", currentToken.c_str());
}
if(currentIndex >= preprocessedText.length()) {
syn = static_cast<int>(TokenType::TOKEN_TYPE_EOF);
syn = static_cast<int>(CodeTokenType::TOKEN_TYPE_EOF);
}
}
return tokens;

View File

@ -4,12 +4,59 @@
#include "Color.h"
#include "RichText.h"
#include <string>
#include "../mystl/my_vector.h"
#include <fstream>
#include "LexicalAnalysis.h"
struct HighlightRule {
CodeTokenType type;
MColor color;
};
class SyntaxHighlighter {
private:
std::string ruleName;
MyVector<HighlightRule> rules;
public:
SyntaxHighlighter(std::string ruleName) {
this->ruleName = ruleName;
std::ifstream file("../data/highlight/" + ruleName + "/rules.txt"); // Just for unit test
// std::ifstream file("./data/highlight/" + ruleName + "/keywords.txt");
if (file.is_open()) {
// printf("keywords file open\n");
std::string line;
while (std::getline(file, line)) {
//every line is like "ruleName(string):color(short)"
CodeTokenType type = stringToCodeTokenType(line.substr(0, line.find(":")));
MColor color = static_cast<MColor>(std::stoi(line.substr(line.find(":") + 1)));
rules.push_back(HighlightRule{type, color});
}
file.close();
}
}
RichText highlight(std::string text) {
LexicalAnalysis lexicalAnalysis(this->ruleName);
lexicalAnalysis.setRawText(text);
MyVector<Token> tokens = lexicalAnalysis.tokenize();
RichText richText;
for (int i = 0; i < tokens.size(); i++) {
Token token = tokens[i];
bool hasMatched = false;
for (int j = 0; j < rules.size(); j++) {
HighlightRule rule = rules[j];
if (token.type == rule.type) {
hasMatched = true;
richText += RichText(token.value, rule.color);
}
}
if (!hasMatched) {
richText += RichText(token.value, COLOR_WHITE);
}
}
return richText;
}
};
#endif // SYNTAX_HIGHLIGHTER_H

View File

@ -0,0 +1,76 @@
#include "SyntaxHighlighter.h"
#include "../components/TextArea.h"
int main() {
SyntaxHighlighter highlighter = SyntaxHighlighter("cpp");
TextArea textArea = TextArea(1, 1, 100, 28);
std::string rawText;
// read in SyntaxHighlighter_test.cpp
std::ifstream file("SyntaxHighlighter_test.cpp");
std::string line;
while (std::getline(file, line)) {
rawText += line + "\n";
}
file.close();
RichText richText = highlighter.highlight(rawText);
// // 创建后台缓冲区
// HANDLE hBackBuffer = CreateConsoleScreenBuffer(GENERIC_READ | GENERIC_WRITE, 0, NULL, CONSOLE_TEXTMODE_BUFFER, NULL);
HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE);
// // 清空后台缓冲区
// CONSOLE_SCREEN_BUFFER_INFO csbi;
// GetConsoleScreenBufferInfo(hConsole, &csbi);
// DWORD dwBytesWritten;
// FillConsoleOutputCharacter(hBackBuffer, ' ', csbi.dwSize.X * csbi.dwSize.Y, {0, 0}, &dwBytesWritten);
// FillConsoleOutputAttribute(hBackBuffer, csbi.wAttributes, csbi.dwSize.X * csbi.dwSize.Y, {0, 0}, &dwBytesWritten);
CONSOLE_CURSOR_INFO cci;
cci.bVisible = false;
cci.dwSize = 1;
//SetConsoleCursorInfo(hBackBuffer, &cci);
SetConsoleCursorInfo(hConsole, &cci);
textArea.setTitle(RichText("SyntaxHighlighter Test", COLOR_LIGHTRED));
textArea.setText(richText);
textArea.draw();
while(true) {
if (_kbhit()) {
// SetConsoleActiveScreenBuffer(hBackBuffer);
char opt = _getch();
switch(opt) {
case 72:
textArea.moveUp();
break;
case 80:
textArea.moveDown();
break;
case 75:
textArea.moveLeft();
break;
case 77:
textArea.moveRight();
break;
case 'q':
// SetConsoleActiveScreenBuffer(hConsole);
// CloseHandle(hBackBuffer);
return 0;
}
// // 清空后台缓冲区
// FillConsoleOutputCharacter(hBackBuffer, ' ', csbi.dwSize.X * csbi.dwSize.Y, {0, 0}, &dwBytesWritten);
// FillConsoleOutputAttribute(hBackBuffer, csbi.wAttributes, csbi.dwSize.X * csbi.dwSize.Y, {0, 0}, &dwBytesWritten);
// // 在后台缓冲区中绘制
textArea.draw();
// // 切换到后台缓冲区,显示绘制的内容
// SetConsoleActiveScreenBuffer(hConsole);
}
Sleep(1);
}
return 0;
}