1
use crate::diagnostics::diagnostics_api::{Diagnostic, Position, Range, Severity};
2
use crate::parser::traversal::WalkDFS;
3
use crate::parser::util::get_tree;
4
use tree_sitter::Node;
5

            
6
/// Helper function to see all the error nodes tree-sitter generated.
7
/// Prints each error or missing node's.
8
pub fn print_all_error_nodes(source: &str) {
9
    if let Some((tree, _)) = get_tree(source) {
10
        let root_node = tree.root_node();
11
        println!("{}", root_node.to_sexp());
12
        let mut stack = vec![root_node];
13
        while let Some(node) = stack.pop() {
14
            if node.is_error() || node.is_missing() {
15
                println!(
16
                    "[all errors] Error node: '{}' [{}:{}-{}:{}] (children: {})",
17
                    node.kind(),
18
                    node.start_position().row,
19
                    node.start_position().column,
20
                    node.end_position().row,
21
                    node.end_position().column,
22
                    node.child_count()
23
                );
24
            }
25
            for i in (0..node.child_count()).rev() {
26
                if let Some(child) = node.child(i) {
27
                    stack.push(child);
28
                }
29
            }
30
        }
31
    } else {
32
        println!("[all errors] Could not parse source.");
33
    }
34
}
35

            
36
/// Helper function
37
pub fn print_diagnostics(diags: &[Diagnostic]) {
38
    for (i, diag) in diags.iter().enumerate() {
39
        println!(
40
            "Diagnostic {}:\n  Range: ({}:{}) - ({}:{})\n  Severity: {:?}\n  Message: {}\n  Source: {}\n",
41
            i + 1,
42
            diag.range.start.line,
43
            diag.range.start.character,
44
            diag.range.end.line,
45
            diag.range.end.character,
46
            diag.severity,
47
            diag.message,
48
            diag.source
49
        );
50
    }
51
}
52

            
53
/// Detects syntactic issues in the essence source text and returns a vector of Diagnostics.
54
///
55
/// This function traverses the parse tree, looking for missing or error nodes, and generates
56
/// diagnostics for each. It uses a DFS and skips children of error/missing nodes
57
/// to avoid duplicate diagnostics. If the source cannot be parsed, a diagnostic is returned for that.
58
///
59
/// # Arguments
60
/// * `source` - The source code to analyze.
61
///
62
/// # Returns
63
/// * `Vec<Diagnostic>` - A vector of diagnostics describing syntactic issues found in the source.
64
pub fn detect_syntactic_errors(source: &str) -> Vec<Diagnostic> {
65
    let mut diagnostics = Vec::new();
66

            
67
    let (tree, _) = match get_tree(source) {
68
        Some(tree) => tree,
69
        None => {
70
            let last_line = source.lines().count().saturating_sub(1);
71
            let last_char = source.lines().last().map(|l| l.len()).unwrap_or(0);
72
            diagnostics.push(Diagnostic {
73
                range: Range {
74
                    start: Position {
75
                        line: 0,
76
                        character: 0,
77
                    },
78
                    end: Position {
79
                        line: last_line as u32,
80
                        character: last_char as u32,
81
                    },
82
                },
83
                severity: Severity::Error,
84
                message: "Failed to read the source code".to_string(),
85
                source: "Tree-Sitter-Parse-Error",
86
            });
87
            return diagnostics;
88
        }
89
    };
90

            
91
    let root_node = tree.root_node();
92
    // Retract (do not descend) if the node is missing, error, or their parent is missing/error
93
    let retract = |node: &tree_sitter::Node| {
94
        node.is_missing() || node.is_error() || node.start_position() == node.end_position()
95
    };
96

            
97
    for node in WalkDFS::with_retract(&root_node, &retract) {
98
        // Tree-sitter sometimes fails to insert a MISSING node, do a range check to be sure
99
        if node.start_position() == node.end_position() {
100
            diagnostics.push(classify_missing_token(node));
101
            continue;
102
        }
103

            
104
        // Only classify error nodes whose parent is not error/missing
105
        if node.is_error()
106
            && !node
107
                .parent()
108
                .is_some_and(|p| p.is_error() || p.is_missing())
109
        {
110
            diagnostics.push(classify_syntax_error(node, source));
111
            continue;
112
        }
113
    }
114

            
115
    diagnostics
116
}
117

            
118
/// Classifies a syntax error node and returns a diagnostic for it.
119
fn classify_syntax_error(node: Node, source: &str) -> Diagnostic {
120
    let (start, end) = (node.start_position(), node.end_position());
121

            
122
    if node.is_missing() {
123
        return classify_missing_token(node);
124
    }
125

            
126
    let message = if is_unexpected_token(node) {
127
        // If no children (exept the token itself) - unexpected token
128
        classify_unexpected_token_error(node, source)
129
    } else {
130
        classify_general_syntax_error(node)
131
    };
132
    Diagnostic {
133
        range: Range {
134
            start: Position {
135
                line: start.row as u32,
136
                character: start.column as u32,
137
            },
138
            end: Position {
139
                line: end.row as u32,
140
                character: end.column as u32,
141
            },
142
        },
143
        severity: Severity::Error,
144
        message,
145
        source: "syntactic-error-detector",
146
    }
147
}
148

            
149
/// When an unexpected sybmol is part of the grammar - token, CST produces one ERROR node
150
/// If not part of the grammar - two nested ERROR nodes.
151
/// For misplaces integers - one ERROR node with no children, for everything else, one child node
152
/// !is_named() is used to detect string literals
153
fn is_unexpected_token(node: Node) -> bool {
154
    node.child_count() == 0
155
        || node.child_count() == 1
156
            && (!node.child(0).unwrap().is_named() || (node.child(0).unwrap().is_error()))
157
}
158
/// Classifies a missing token node and generates a diagnostic with a context-aware message.
159
fn classify_missing_token(node: Node) -> Diagnostic {
160
    let start = node.start_position();
161
    let end = node.end_position();
162

            
163
    let message = if let Some(parent) = node.parent() {
164
        match parent.kind() {
165
            "letting_statement" => "Missing 'expression or domain'".to_string(),
166
            "and_expr" => "Missing right operand in 'and' expression".to_string(),
167
            "comparison_expr" => "Missing right operand in 'comparison' expression".to_string(),
168
            _ => format!("Missing '{}'", node.kind()),
169
        }
170
    } else {
171
        format!("Missing '{}'", node.kind())
172
    };
173

            
174
    Diagnostic {
175
        range: Range {
176
            start: Position {
177
                line: start.row as u32,
178
                character: start.column as u32,
179
            },
180
            end: Position {
181
                line: end.row as u32,
182
                character: end.column as u32,
183
            },
184
        },
185
        severity: Severity::Error,
186
        message,
187
        source: "syntactic-error-detector",
188
    }
189
}
190

            
191
fn classify_unexpected_token_error(node: Node, source_code: &str) -> String {
192
    if let Some(parent) = node.parent() {
193
        let src_token = &source_code[node.start_byte()..node.end_byte()];
194

            
195
        // Unexpected token at the end of a statement
196
        if parent.kind() == "program" {
197
            // Save cursor position
198
            if let Some(prev_sib) = node.prev_sibling().and_then(|n| n.prev_sibling()) {
199
                format!(
200
                    "Unexpected '{}' at the end of '{}'",
201
                    src_token,
202
                    prev_sib.kind()
203
                )
204
            } else {
205
                format!("Unexpected '{}' ", src_token)
206
            }
207
        } else {
208
            format!("Unexpected '{}' inside '{}'", src_token, parent.kind())
209
        }
210
    // Error at root node (program)
211
    } else {
212
        format!("Unexpected '{}", source_code)
213
    }
214
}
215

            
216
/// Classifies a general syntax error that cannot be classified with other functions.
217
fn classify_general_syntax_error(node: Node) -> String {
218
    if let Some(parent) = node.parent() {
219
        format!(
220
            "Syntax error in '{}': unexpected or invalid '{}'.",
221
            parent.kind(),
222
            node.kind()
223
        )
224
    } else {
225
        format!("Syntax error: unexpected or invalid '{}'.", node.kind())
226
    }
227
}
228

            
229
/// Helper function for tests to compare the actual diagnostic with the expected one.
230
pub fn check_diagnostic(
231
    diag: &Diagnostic,
232
    line_start: u32,
233
    char_start: u32,
234
    line_end: u32,
235
    char_end: u32,
236
    msg: &str,
237
) {
238
    // Checking range
239
    assert_eq!(diag.range.start.line, line_start);
240
    assert_eq!(diag.range.start.character, char_start);
241
    assert_eq!(diag.range.end.line, line_end);
242
    assert_eq!(diag.range.end.character, char_end);
243

            
244
    // Check the message
245
    assert_eq!(diag.message, msg);
246
}
247

            
248
#[test]
249
fn error_at_start() {
250
    let source = "; find x: int(1..3)";
251
    let diagnostics = detect_syntactic_errors(source);
252
    assert!(!diagnostics.is_empty(), "Expected at least one diagnostic");
253
    let diag = &diagnostics[0];
254
    check_diagnostic(diag, 0, 0, 0, 19, "Failed to read the source code");
255
}