1
use crate::diagnostics::diagnostics_api::SymbolKind;
2
use crate::diagnostics::source_map::SourceMap;
3
use crate::parser::syntax_errors::line_start_byte;
4

            
5
pub const TOKEN_TYPE_NUMBER: u32 = 0;
6
pub const TOKEN_TYPE_FUNCTION: u32 = 1;
7
pub const TOKEN_TYPE_VARIABLE: u32 = 2;
8
pub const TOKEN_TYPE_LETTING: u32 = 3;
9
pub const TOKEN_TYPE_FIND: u32 = 4;
10
pub const TOKEN_TYPE_DOMAIN: u32 = 5;
11
pub const TOKEN_TYPE_LETTINGVAR: u32 = 6;
12
pub const TOKEN_TYPE_FINDVAR: u32 = 7;
13
pub const TOKEN_TYPE_GIVEN: u32 = 8;
14
pub const TOKEN_TYPE_GIVENVAR: u32 = 9;
15

            
16
pub const MODIFIER_DECLARATION: u32 = 0;
17
pub const MODIFIER_READONLY: u32 = 1;
18

            
19
pub struct TokenEncoding {
20
    pub ty: u32,
21
    pub modifiers: u32,
22
}
23

            
24
fn utf16_units(bytes: &[u8]) -> u32 {
25
    String::from_utf8_lossy(bytes).encode_utf16().count() as u32
26
}
27

            
28
fn line_start_offsets(source: &[u8]) -> Vec<usize> {
29
    let mut starts = vec![0usize];
30
    for (idx, b) in source.iter().enumerate() {
31
        if *b == b'\n' {
32
            starts.push(idx + 1);
33
        }
34
    }
35
    starts
36
}
37

            
38
fn line_index_at_byte(line_starts: &[usize], byte: usize) -> usize {
39
    // index of last line start <= byte
40
    line_starts
41
        .partition_point(|&start| start <= byte)
42
        .saturating_sub(1)
43
}
44

            
45
// maps kind in SourceMap into a TokenEncoding
46
pub fn token_encoding(kind: &SymbolKind) -> Option<TokenEncoding> {
47
    match kind {
48
        SymbolKind::Integer => Some(TokenEncoding {
49
            ty: TOKEN_TYPE_NUMBER,
50
            modifiers: 0,
51
        }),
52
        SymbolKind::Decimal => Some(TokenEncoding {
53
            ty: TOKEN_TYPE_NUMBER,
54
            modifiers: 0,
55
        }),
56
        SymbolKind::Function => Some(TokenEncoding {
57
            ty: TOKEN_TYPE_FUNCTION,
58
            modifiers: 0,
59
        }),
60
        SymbolKind::Variable => Some(TokenEncoding {
61
            ty: TOKEN_TYPE_VARIABLE,
62
            modifiers: 0,
63
        }),
64
        SymbolKind::Constant => Some(TokenEncoding {
65
            ty: TOKEN_TYPE_VARIABLE,
66
            modifiers: (1 << MODIFIER_READONLY),
67
        }),
68
        SymbolKind::Letting => Some(TokenEncoding {
69
            ty: TOKEN_TYPE_LETTING,
70
            modifiers: 0,
71
        }),
72
        SymbolKind::Find => Some(TokenEncoding {
73
            ty: TOKEN_TYPE_FIND,
74
            modifiers: 0,
75
        }),
76
        SymbolKind::Domain => Some(TokenEncoding {
77
            ty: TOKEN_TYPE_DOMAIN,
78
            modifiers: 0,
79
        }),
80
        SymbolKind::FindVar => Some(TokenEncoding {
81
            ty: TOKEN_TYPE_FINDVAR,
82
            modifiers: (1 << MODIFIER_DECLARATION),
83
        }),
84
        SymbolKind::LettingVar => Some(TokenEncoding {
85
            ty: TOKEN_TYPE_LETTINGVAR,
86
            modifiers: (1 << MODIFIER_DECLARATION),
87
        }),
88
        SymbolKind::Given => Some(TokenEncoding {
89
            ty: TOKEN_TYPE_GIVEN,
90
            modifiers: 0,
91
        }),
92
        SymbolKind::GivenVar => Some(TokenEncoding {
93
            ty: TOKEN_TYPE_GIVENVAR,
94
            modifiers: (1 << MODIFIER_DECLARATION),
95
        }),
96
    }
97
}
98

            
99
// translate span in SourceMap into the VSCode semantic token format
100
// NOTE: LSP semantic token positions and lengths are UTF-16 code units.
101
pub fn encode_semantic_tokens(source_map: &SourceMap, source: &str) -> Vec<u32> {
102
    let source_bytes = source.as_bytes();
103
    let line_starts = line_start_offsets(source_bytes);
104
    let mut entries: Vec<(u32, u32, u32, u32, u32)> = source_map
105
        .spans
106
        .iter()
107
        .filter_map(|span| {
108
            let kind = span.hover_info.as_ref()?.kind.as_ref()?;
109
            let enc = token_encoding(kind)?;
110

            
111
            let start_byte = span.start_byte;
112
            let end_byte = span.end_byte;
113
            if end_byte <= start_byte
114
                || end_byte > source_bytes.len()
115
                || start_byte > source_bytes.len()
116
            {
117
                return None;
118
            }
119

            
120
            let start_line = line_index_at_byte(&line_starts, start_byte);
121
            let end_line = line_index_at_byte(&line_starts, end_byte.saturating_sub(1));
122
            if start_line != end_line {
123
                // LSP semantic token entries should not span lines.
124
                return None;
125
            }
126

            
127
            let line_start = line_start_byte(source_bytes, start_line);
128
            if start_byte < line_start {
129
                return None;
130
            }
131

            
132
            let col = utf16_units(source_bytes.get(line_start..start_byte)?);
133
            let len = utf16_units(source_bytes.get(start_byte..end_byte)?);
134
            if len == 0 {
135
                return None;
136
            }
137
            Some((start_line as u32, col, len, enc.ty, enc.modifiers))
138
        })
139
        .collect();
140

            
141
    entries.sort_by_key(|&(line, col, _, _, _)| (line, col));
142

            
143
    let mut data = Vec::with_capacity(entries.len() * 5);
144
    let mut prev_line = 0u32;
145
    let mut prev_col = 0u32;
146

            
147
    for (line, col, len, ty, modifiers) in entries {
148
        let delta_line = line - prev_line;
149
        let delta_col = if delta_line == 0 { col - prev_col } else { col };
150
        data.extend_from_slice(&[delta_line, delta_col, len, ty, modifiers]);
151
        prev_line = line;
152
        prev_col = col;
153
    }
154

            
155
    data
156
}