Skip to main content

conjure_cp_essence_parser/diagnostics/error_detection/
syntactic_errors.rs

1use crate::diagnostics::diagnostics_api::{Diagnostic, Position, Range, Severity};
2use crate::parser::traversal::WalkDFS;
3use crate::parser::util::get_tree;
4use tree_sitter::Node;
5
6/// Helper function to see all the error nodes tree-sitter generated.
7/// Prints each error or missing node's.
8pub fn print_all_error_nodes(source: &str) {
9    if let Some((tree, _)) = get_tree(source) {
10        let root_node = tree.root_node();
11        println!("{}", root_node.to_sexp());
12        let mut stack = vec![root_node];
13        while let Some(node) = stack.pop() {
14            if node.is_error() || node.is_missing() || node.has_error() {
15                println!(
16                    "Error: '{}' [{}:{}-{}:{}] (children: {}) parent: {}",
17                    node.kind(),
18                    node.start_position().row,
19                    node.start_position().column,
20                    node.end_position().row,
21                    node.end_position().column,
22                    node.child_count(),
23                    node.parent()
24                        .map_or("None".to_string(), |p| p.kind().to_string())
25                );
26            }
27            for i in (0..node.child_count()).rev() {
28                if let Some(child) = node.child(i) {
29                    stack.push(child);
30                }
31            }
32        }
33    } else {
34        println!("[all errors] Could not parse source.");
35    }
36}
37
38/// Helper function
39pub fn print_diagnostics(diags: &[Diagnostic]) {
40    for (i, diag) in diags.iter().enumerate() {
41        println!(
42            "Diagnostic {}:\n  Range: ({}:{}) - ({}:{})\n  Severity: {:?}\n  Message: {}\n  Source: {}\n",
43            i + 1,
44            diag.range.start.line,
45            diag.range.start.character,
46            diag.range.end.line,
47            diag.range.end.character,
48            diag.severity,
49            diag.message,
50            diag.source
51        );
52    }
53}
54
55/// Returns true if the node's start or end column is out of range for its line in the source.
56fn error_node_out_of_range(node: &tree_sitter::Node, source: &str) -> bool {
57    let lines: Vec<&str> = source.lines().collect();
58    let start = node.start_position();
59    let end = node.end_position();
60
61    let start_line_len = lines.get(start.row).map_or(0, |l| l.len());
62    let end_line_len = lines.get(end.row).map_or(0, |l| l.len());
63
64    (start.column > start_line_len) || (end.column > end_line_len)
65}
66
67/// Detects syntactic issues in the essence source text and returns a vector of Diagnostics.
68///
69/// This function traverses the parse tree, looking for missing or error nodes, and generates
70/// diagnostics for each. It uses a DFS and skips children of error/missing nodes
71/// to avoid duplicate diagnostics. If the source cannot be parsed, a diagnostic is returned for that.
72///
73/// # Arguments
74/// * `source` - The source code to analyze.
75///
76/// # Returns
77/// * `Vec<Diagnostic>` - A vector of diagnostics describing syntactic issues found in the source.
78pub fn detect_syntactic_errors(source: &str) -> Vec<Diagnostic> {
79    let mut diagnostics = Vec::new();
80
81    let (tree, _) = match get_tree(source) {
82        Some(tree) => tree,
83        None => {
84            let last_line = source.lines().count().saturating_sub(1);
85            let last_char = source.lines().last().map(|l| l.len()).unwrap_or(0);
86            diagnostics.push(Diagnostic {
87                range: Range {
88                    start: Position {
89                        line: 0,
90                        character: 0,
91                    },
92                    end: Position {
93                        line: last_line as u32,
94                        character: last_char as u32,
95                    },
96                },
97                severity: Severity::Error,
98                message: "Failed to read the source code".to_string(),
99                source: "Tree-Sitter-Parse-Error",
100            });
101            return diagnostics;
102        }
103    };
104
105    let root_node = tree.root_node();
106    // Retract (do not descend) if the node is missing, error, or their parent is missing/error
107    let retract = |node: &tree_sitter::Node| {
108        node.is_missing() || node.is_error() || node.start_position() == node.end_position()
109    };
110
111    for node in WalkDFS::with_retract(&root_node, &retract) {
112        // Tree-sitter sometimes fails to insert a MISSING node, do a range check to be sure
113        if node.start_position() == node.end_position() {
114            diagnostics.push(classify_missing_token(node));
115            continue;
116        }
117        // Only classify error nodes whose parent is not error/missing
118        if (node.is_error())
119            && !node
120                .parent()
121                .is_some_and(|p| p.is_error() || p.is_missing())
122        {
123            diagnostics.push(classify_syntax_error(node, source));
124            continue;
125        }
126    }
127
128    diagnostics
129}
130
131/// Classifies a syntax error node and returns a diagnostic for it.
132fn classify_syntax_error(node: Node, source: &str) -> Diagnostic {
133    if node.is_missing() {
134        classify_missing_token(node)
135    } else if node.is_error() {
136        classify_unexpected_token_error(node, source)
137    } else {
138        classify_general_syntax_error(node)
139    }
140}
141
142/// Classifies a missing token node and generates a diagnostic with a context-aware message.
143fn classify_missing_token(node: Node) -> Diagnostic {
144    let start = node.start_position();
145    let end = node.end_position();
146
147    let message = if let Some(parent) = node.parent() {
148        match parent.kind() {
149            "letting_statement" => "Missing 'expression or domain'".to_string(),
150            "and_expr" => "Missing right operand in 'and' expression".to_string(),
151            "comparison_expr" => "Missing right operand in 'comparison' expression".to_string(),
152            _ => format!("Missing '{}'", node.kind()),
153        }
154    } else {
155        format!("Missing '{}'", node.kind())
156    };
157
158    Diagnostic {
159        range: Range {
160            start: Position {
161                line: start.row as u32,
162                character: start.column as u32,
163            },
164            end: Position {
165                line: end.row as u32,
166                character: end.column as u32,
167            },
168        },
169        severity: Severity::Error,
170        message,
171        source: "syntactic-error-detector",
172    }
173}
174fn classify_unexpected_token_error(node: Node, source_code: &str) -> Diagnostic {
175    let (message, whole_line, line_index) = if let Some(parent) = node.parent() {
176        let start_byte = node.start_byte().min(source_code.len());
177        let end_byte = node.end_byte().min(source_code.len());
178        let src_token = &source_code[start_byte..end_byte];
179
180        // Malformed entire lines
181        // Tree-sitter cannot apply any grammar rule to a line
182
183        // ERROR node is the direct child of the root node
184        if parent.kind() == "program" {
185            let li = node.start_position().row;
186            let line_text = source_code.lines().nth(li).unwrap_or("");
187
188            // happens when the malformed line is the first
189            // Tree-sitter places the error node out of range, needs separate handling
190            if error_node_out_of_range(&node, source_code) || node.start_position().column == 0 {
191                (
192                    format!("Malformed line {}: '{}'", li + 1, line_text),
193                    true,
194                    li,
195                )
196
197            // Unexpected tokens
198
199            // Tree-sitter classified a line but found unexpected token at the end of it
200            } else if let Some(prev_sib) = node.prev_sibling().and_then(|n| n.prev_sibling()) {
201                (
202                    format!(
203                        "Unexpected '{}' at the end of '{}'",
204                        src_token,
205                        prev_sib.kind()
206                    ),
207                    false,
208                    li,
209                )
210            } else {
211                (format!("Unexpected '{}'", src_token), false, li)
212            }
213        // Unexpected tokens inside constructs
214        } else {
215            (
216                format!("Unexpected '{}' inside '{}'", src_token, parent.kind()),
217                false,
218                0,
219            )
220        }
221    } else {
222        (format!("Unexpected '{}'", source_code), false, 0)
223    };
224
225    // compute range once based on whole_line flag or node positions
226    let (start, end) = if whole_line {
227        let li = line_index;
228        let line_text = source_code.lines().nth(li).unwrap_or("");
229        (
230            Position {
231                line: li as u32,
232                character: 0,
233            },
234            Position {
235                line: li as u32,
236                character: line_text.len() as u32,
237            },
238        )
239    } else {
240        (
241            Position {
242                line: node.start_position().row as u32,
243                character: node.start_position().column as u32,
244            },
245            Position {
246                line: node.end_position().row as u32,
247                character: node.end_position().column as u32,
248            },
249        )
250    };
251
252    Diagnostic {
253        range: Range { start, end },
254        severity: Severity::Error,
255        message,
256        source: "syntactic-error-detector",
257    }
258}
259
260/// Classifies a general syntax error that cannot be classified with other functions.
261fn classify_general_syntax_error(node: Node) -> Diagnostic {
262    let message = if let Some(parent) = node.parent() {
263        format!(
264            "Syntax error in '{}': unexpected or invalid '{}'.",
265            parent.kind(),
266            node.kind()
267        )
268    } else {
269        format!("Syntax error: unexpected or invalid '{}'.", node.kind())
270    };
271
272    Diagnostic {
273        range: Range {
274            start: Position {
275                line: node.start_position().row as u32,
276                character: node.start_position().column as u32,
277            },
278            end: Position {
279                line: node.end_position().row as u32,
280                character: node.end_position().column as u32,
281            },
282        },
283        severity: Severity::Error,
284        message,
285        source: "syntactic-error-detector",
286    }
287}
288
289/// Helper function for tests to compare the actual diagnostic with the expected one.
290pub fn check_diagnostic(
291    diag: &Diagnostic,
292    line_start: u32,
293    char_start: u32,
294    line_end: u32,
295    char_end: u32,
296    msg: &str,
297) {
298    // Checking range
299    assert_eq!(diag.range.start.line, line_start);
300    assert_eq!(diag.range.start.character, char_start);
301    assert_eq!(diag.range.end.line, line_end);
302    assert_eq!(diag.range.end.character, char_end);
303
304    // Check the message
305    assert_eq!(diag.message, msg);
306}
307
308#[test]
309fn error_at_start() {
310    let source = "; find x: int(1..3)";
311    let diagnostics = detect_syntactic_errors(source);
312    assert!(!diagnostics.is_empty(), "Expected at least one diagnostic");
313    let diag = &diagnostics[0];
314    check_diagnostic(diag, 0, 0, 0, 19, "Failed to read the source code");
315}