conjure_cp_essence_parser/diagnostics/error_detection/
syntactic_errors.rs

1use crate::diagnostics::diagnostics_api::{Diagnostic, Position, Range, Severity};
2use crate::parser::traversal::WalkDFS;
3use crate::parser::util::get_tree;
4use tree_sitter::Node;
5
6/// Helper function to see all the error nodes tree-sitter generated.
7/// Prints each error or missing node's.
8pub fn print_all_error_nodes(source: &str) {
9    if let Some((tree, _)) = get_tree(source) {
10        let root_node = tree.root_node();
11        println!("{}", root_node.to_sexp());
12        let mut stack = vec![root_node];
13        while let Some(node) = stack.pop() {
14            if node.is_error() || node.is_missing() {
15                println!(
16                    "[all errors] Error node: '{}' [{}:{}-{}:{}] (children: {})",
17                    node.kind(),
18                    node.start_position().row,
19                    node.start_position().column,
20                    node.end_position().row,
21                    node.end_position().column,
22                    node.child_count()
23                );
24            }
25            for i in (0..node.child_count()).rev() {
26                if let Some(child) = node.child(i) {
27                    stack.push(child);
28                }
29            }
30        }
31    } else {
32        println!("[all errors] Could not parse source.");
33    }
34}
35
36/// Helper function
37pub fn print_diagnostics(diags: &[Diagnostic]) {
38    for (i, diag) in diags.iter().enumerate() {
39        println!(
40            "Diagnostic {}:\n  Range: ({}:{}) - ({}:{})\n  Severity: {:?}\n  Message: {}\n  Source: {}\n",
41            i + 1,
42            diag.range.start.line,
43            diag.range.start.character,
44            diag.range.end.line,
45            diag.range.end.character,
46            diag.severity,
47            diag.message,
48            diag.source
49        );
50    }
51}
52
53/// Detects syntactic issues in the essence source text and returns a vector of Diagnostics.
54///
55/// This function traverses the parse tree, looking for missing or error nodes, and generates
56/// diagnostics for each. It uses a DFS and skips children of error/missing nodes
57/// to avoid duplicate diagnostics. If the source cannot be parsed, a diagnostic is returned for that.
58///
59/// # Arguments
60/// * `source` - The source code to analyze.
61///
62/// # Returns
63/// * `Vec<Diagnostic>` - A vector of diagnostics describing syntactic issues found in the source.
64pub fn detect_syntactic_errors(source: &str) -> Vec<Diagnostic> {
65    let mut diagnostics = Vec::new();
66
67    let (tree, _) = match get_tree(source) {
68        Some(tree) => tree,
69        None => {
70            let last_line = source.lines().count().saturating_sub(1);
71            let last_char = source.lines().last().map(|l| l.len()).unwrap_or(0);
72            diagnostics.push(Diagnostic {
73                range: Range {
74                    start: Position {
75                        line: 0,
76                        character: 0,
77                    },
78                    end: Position {
79                        line: last_line as u32,
80                        character: last_char as u32,
81                    },
82                },
83                severity: Severity::Error,
84                message: "Failed to read the source code".to_string(),
85                source: "Tree-Sitter-Parse-Error",
86            });
87            return diagnostics;
88        }
89    };
90
91    let root_node = tree.root_node();
92    // Retract (do not descend) if the node is missing, error, or their parent is missing/error
93    let retract = |node: &tree_sitter::Node| {
94        node.is_missing() || node.is_error() || node.start_position() == node.end_position()
95    };
96
97    for node in WalkDFS::with_retract(&root_node, &retract) {
98        // Tree-sitter sometimes fails to insert a MISSING node, do a range check to be sure
99        if node.start_position() == node.end_position() {
100            diagnostics.push(classify_missing_token(node));
101            continue;
102        }
103
104        // Only classify error nodes whose parent is not error/missing
105        if node.is_error()
106            && !node
107                .parent()
108                .is_some_and(|p| p.is_error() || p.is_missing())
109        {
110            diagnostics.push(classify_syntax_error(node, source));
111            continue;
112        }
113    }
114
115    diagnostics
116}
117
118/// Classifies a syntax error node and returns a diagnostic for it.
119fn classify_syntax_error(node: Node, source: &str) -> Diagnostic {
120    let (start, end) = (node.start_position(), node.end_position());
121
122    if node.is_missing() {
123        return classify_missing_token(node);
124    }
125
126    let message = if is_unexpected_token(node) {
127        // If no children (exept the token itself) - unexpected token
128        classify_unexpected_token_error(node, source)
129    } else {
130        classify_general_syntax_error(node)
131    };
132    Diagnostic {
133        range: Range {
134            start: Position {
135                line: start.row as u32,
136                character: start.column as u32,
137            },
138            end: Position {
139                line: end.row as u32,
140                character: end.column as u32,
141            },
142        },
143        severity: Severity::Error,
144        message,
145        source: "syntactic-error-detector",
146    }
147}
148
149/// When an unexpected sybmol is part of the grammar - token, CST produces one ERROR node
150/// If not part of the grammar - two nested ERROR nodes.
151/// For misplaces integers - one ERROR node with no children, for everything else, one child node
152/// !is_named() is used to detect string literals
153fn is_unexpected_token(node: Node) -> bool {
154    node.child_count() == 0
155        || node.child_count() == 1
156            && (!node.child(0).unwrap().is_named() || (node.child(0).unwrap().is_error()))
157}
158/// Classifies a missing token node and generates a diagnostic with a context-aware message.
159fn classify_missing_token(node: Node) -> Diagnostic {
160    let start = node.start_position();
161    let end = node.end_position();
162
163    let message = if let Some(parent) = node.parent() {
164        match parent.kind() {
165            "letting_statement" => "Missing 'expression or domain'".to_string(),
166            "and_expr" => "Missing right operand in 'and' expression".to_string(),
167            "comparison_expr" => "Missing right operand in 'comparison' expression".to_string(),
168            _ => format!("Missing '{}'", node.kind()),
169        }
170    } else {
171        format!("Missing '{}'", node.kind())
172    };
173
174    Diagnostic {
175        range: Range {
176            start: Position {
177                line: start.row as u32,
178                character: start.column as u32,
179            },
180            end: Position {
181                line: end.row as u32,
182                character: end.column as u32,
183            },
184        },
185        severity: Severity::Error,
186        message,
187        source: "syntactic-error-detector",
188    }
189}
190
191fn classify_unexpected_token_error(node: Node, source_code: &str) -> String {
192    if let Some(parent) = node.parent() {
193        let src_token = &source_code[node.start_byte()..node.end_byte()];
194
195        // Unexpected token at the end of a statement
196        if parent.kind() == "program" {
197            // Save cursor position
198            if let Some(prev_sib) = node.prev_sibling().and_then(|n| n.prev_sibling()) {
199                format!(
200                    "Unexpected '{}' at the end of '{}'",
201                    src_token,
202                    prev_sib.kind()
203                )
204            } else {
205                format!("Unexpected '{}' ", src_token)
206            }
207        } else {
208            format!("Unexpected '{}' inside '{}'", src_token, parent.kind())
209        }
210    // Error at root node (program)
211    } else {
212        format!("Unexpected '{}", source_code)
213    }
214}
215
216/// Classifies a general syntax error that cannot be classified with other functions.
217fn classify_general_syntax_error(node: Node) -> String {
218    if let Some(parent) = node.parent() {
219        format!(
220            "Syntax error in '{}': unexpected or invalid '{}'.",
221            parent.kind(),
222            node.kind()
223        )
224    } else {
225        format!("Syntax error: unexpected or invalid '{}'.", node.kind())
226    }
227}
228
229/// Helper function for tests to compare the actual diagnostic with the expected one.
230pub fn check_diagnostic(
231    diag: &Diagnostic,
232    line_start: u32,
233    char_start: u32,
234    line_end: u32,
235    char_end: u32,
236    msg: &str,
237) {
238    // Checking range
239    assert_eq!(diag.range.start.line, line_start);
240    assert_eq!(diag.range.start.character, char_start);
241    assert_eq!(diag.range.end.line, line_end);
242    assert_eq!(diag.range.end.character, char_end);
243
244    // Check the message
245    assert_eq!(diag.message, msg);
246}
247
248#[test]
249fn error_at_start() {
250    let source = "; find x: int(1..3)";
251    let diagnostics = detect_syntactic_errors(source);
252    assert!(!diagnostics.is_empty(), "Expected at least one diagnostic");
253    let diag = &diagnostics[0];
254    check_diagnostic(diag, 0, 0, 0, 19, "Failed to read the source code");
255}