lang_jsonld/lang/
parser.rs

1use std::{
2    io::{self, Write},
3    ops::Range,
4};
5
6use chumsky::{prelude::*, Error, Parser, Stream};
7use lsp_core::prelude::{spanned, MyTerm, Spanned, Token};
8use Token::*;
9
10struct ObjectMemberManager<'a> {
11    out: Vec<Spanned<ObjectMember>>,
12    full_start: usize,
13    start: usize,
14
15    seen_comma: bool,
16    seen_colon: bool,
17
18    current_key: Option<Spanned<Token>>,
19    current_value: Option<Spanned<Json>>,
20    emit: &'a mut dyn FnMut(Simple<Token>),
21}
22
23impl<'a> ObjectMemberManager<'a> {
24    fn new(span: &Range<usize>, emit: &'a mut dyn FnMut(Simple<Token>)) -> Self {
25        Self {
26            out: vec![],
27            full_start: span.start,
28            start: span.start,
29            seen_comma: false,
30            seen_colon: false,
31            current_key: None,
32            current_value: None,
33            emit,
34        }
35    }
36
37    #[allow(unused)]
38    fn print(&self) {
39        println!(
40            "key {:?} value {:?} (out {} len) (start {} full start {})",
41            self.current_key.as_ref().map(|x| x.value()),
42            self.current_value.as_ref().map(|x| x.value()),
43            self.out.len(),
44            self.start,
45            self.full_start
46        )
47    }
48
49    fn invalid(&mut self, span: Range<usize>) -> Spanned<Token> {
50        (self.emit)(Simple::custom(span.clone(), "Expected valid token"));
51        Spanned(Token::Invalid("".to_string()), span)
52    }
53
54    fn invalid_json(&mut self, span: Range<usize>) -> Spanned<Json> {
55        (self.emit)(Simple::custom(span.clone(), "Expected valid json"));
56        Spanned(Json::Invalid, span)
57    }
58
59    fn eat_json(&mut self, part: Spanned<Json>) {
60        if self.current_key.is_none() {
61            let span = part.span().clone();
62            match part {
63                Spanned(Json::Token(t), span) => {
64                    self.current_key = Some(Spanned(t, span));
65                }
66                x => {
67                    self.current_key = Some(self.invalid(self.start..span.start));
68                    self.current_value = Some(x);
69                }
70            }
71
72            self.full_start = span.start;
73            self.start = span.end + 1;
74            return;
75        }
76
77        if self.current_value.is_none() {
78            if !self.seen_colon {
79                (self.emit)(Simple::custom(
80                    self.start - 1..self.start,
81                    "expected colon, didn't find one",
82                ));
83            }
84
85            self.start = part.span().end + 1;
86            self.current_value = Some(part);
87            return;
88        }
89
90        // We didn't expect to flush a thing, but we did
91        self.flush(self.full_start..part.span().end, false);
92        self.eat_json(part);
93    }
94
95    fn eat_token(&mut self, token: Spanned<Token>) {
96        match token {
97            Spanned(Token::Colon, span) => {
98                (self.current_key, self.current_value) =
99                    match (self.current_key.take(), self.current_value.take()) {
100                        (Some(k), Some(Spanned(Json::Token(k2), r))) => {
101                            self.current_key = Some(k);
102                            // self.current_value = Some(self.invalid_json(r.clone()));
103                            self.flush(span.clone(), false);
104                            (Some(Spanned(k2, r)), None)
105                        }
106                        (k, v) => (k, v),
107                    };
108                if self.seen_colon {
109                    (self.emit)(Simple::custom(
110                        span.clone(),
111                        "Unexepected colon, already seen one",
112                    ));
113                }
114                self.seen_colon = true;
115                // we expect to set the second part
116                if self.current_key.is_none() {
117                    self.current_key = Some(self.invalid(self.start..span.start));
118                }
119                self.start = span.end;
120            }
121            Spanned(Token::Comma, span) => {
122                if self.seen_comma {
123                    (self.emit)(Simple::custom(
124                        span.clone(),
125                        "Unexepected comma, already seen one",
126                    ));
127                }
128                self.seen_comma = true;
129                self.flush(span, false);
130            }
131            Spanned(x, s) => {
132                (self.emit)(Simple::expected_input_found(
133                    s,
134                    [Some(Token::Colon), Some(Token::Comma)],
135                    Some(x),
136                ));
137            }
138        }
139    }
140    fn flush(&mut self, span: Range<usize>, end: bool) {
141        if !end && !self.seen_comma {
142            (self.emit)(Simple::custom(
143                span.end - 1..span.end,
144                "Expected comma, but didn't find one",
145            ))
146        }
147        let k = match self.current_key.take() {
148            Some(k) => k,
149            None => self.invalid(span.clone()),
150        };
151        let v = match self.current_value.take() {
152            Some(v) => v,
153            None => self.invalid_json(span.clone()),
154        };
155        self.out
156            .push(Spanned(ObjectMember::Full(k, v), self.full_start..span.end));
157        self.start = span.end + 1;
158        self.full_start = span.end + 1;
159        self.seen_colon = false;
160        self.seen_comma = false;
161    }
162}
163
164#[derive(Clone, PartialEq, Debug)]
165pub enum ObjectMember {
166    Full(Spanned<Token>, Spanned<Json>),
167    Partial(Spanned<Token>, Option<Spanned<()>>, Option<Spanned<Json>>),
168}
169impl ObjectMember {
170    pub fn field(&self) -> &Spanned<Token> {
171        match self {
172            ObjectMember::Full(spanned, _) => spanned,
173            ObjectMember::Partial(spanned, _, _) => spanned,
174        }
175    }
176
177    pub fn json_value(&self) -> Option<&Spanned<Json>> {
178        match self {
179            ObjectMember::Full(_, spanned) => Some(spanned),
180            ObjectMember::Partial(_, _, spanned) => spanned.as_ref(),
181        }
182    }
183}
184
185#[derive(Clone, PartialEq, Debug)]
186pub enum Json {
187    Invalid,
188    Token(Token),
189    Array(Vec<Spanned<Json>>),
190    Object(Vec<Spanned<ObjectMember>>),
191}
192
193impl Json {
194    pub fn extract_triples(&self) -> Vec<MyTerm<'static>> {
195        Vec::new()
196    }
197    pub fn token(&self) -> Option<&Token> {
198        match self {
199            Json::Token(t) => Some(t),
200            _ => None,
201        }
202    }
203}
204
205pub struct JsonFormatter {
206    pub indent: String,
207    pub inc: usize,
208}
209impl JsonFormatter {
210    pub fn inc(&mut self) {
211        self.inc += 1;
212    }
213
214    pub fn decr(&mut self) {
215        self.inc -= 1;
216    }
217
218    pub fn line(&mut self, writer: &mut impl Write) -> io::Result<()> {
219        write!(writer, "\n")?;
220        for _ in 0..self.inc {
221            write!(writer, "{}", &self.indent)?;
222        }
223        Ok(())
224    }
225
226    pub fn format(&mut self, json: &Json, writer: &mut impl Write) -> io::Result<()> {
227        use std::io::{Error, ErrorKind};
228        match json {
229            Json::Invalid => {
230                return Result::Err(Error::new(ErrorKind::Other, "cannot format invalid json"))
231            }
232            Json::Token(t) => write!(writer, "{}", t)?,
233            Json::Array(xs) => {
234                write!(writer, "[")?;
235                self.inc();
236                self.line(writer)?;
237                let mut first = true;
238                for t in xs {
239                    if !first {
240                        write!(writer, ",")?;
241                        self.line(writer)?;
242                    }
243                    self.format(&t.0, writer)?;
244                    first = false;
245                }
246                self.decr();
247                self.line(writer)?;
248                write!(writer, "]")?;
249            }
250            Json::Object(xs) => {
251                write!(writer, "{{")?;
252                self.inc();
253                self.line(writer)?;
254                let mut first = true;
255                for t in xs {
256                    if !first {
257                        write!(writer, ",")?;
258                        self.line(writer)?;
259                    }
260                    match &t.0 {
261                        ObjectMember::Full(x, y) => {
262                            write!(writer, "{}: ", x.0)?;
263                            self.format(y, writer)?;
264                        }
265                        ObjectMember::Partial(_, _, _) => {
266                            return Result::Err(Error::new(
267                                ErrorKind::Other,
268                                "cannot format invalid json",
269                            ))
270                        }
271                    }
272                    first = false;
273                }
274                self.decr();
275                self.line(writer)?;
276                write!(writer, "}}")?;
277            }
278        }
279        Ok(())
280    }
281}
282
283impl Default for Json {
284    fn default() -> Self {
285        Self::Invalid
286    }
287}
288
289pub fn parse(source: &str, tokens: Vec<Spanned<Token>>) -> (Spanned<Json>, Vec<Simple<Token>>) {
290    let stream = Stream::from_iter(
291        0..source.len() + 1,
292        tokens.into_iter().map(|Spanned(x, s)| (x, s)),
293    );
294
295    let parser = parser().then_ignore(end().recover_with(skip_then_retry_until([])));
296    let (json, json_errors) = parser.parse_recovery(stream);
297
298    (
299        json.unwrap_or(Spanned(Json::Invalid, 0..source.len())),
300        json_errors,
301    )
302}
303
304type S = std::ops::Range<usize>;
305fn expect_token(
306    token: Token,
307    not_allowed: Token,
308) -> impl Parser<Token, Token, Error = Simple<Token, S>> + Clone {
309    just(token.clone()).or(none_of([token.clone(), not_allowed]).rewind().validate(
310        move |x, span: S, emit| {
311            emit(Simple::expected_input_found(
312                span,
313                [Some(token.clone())],
314                Some(x),
315            ));
316            token.clone()
317        },
318    ))
319}
320
321fn parser() -> impl Parser<Token, Spanned<Json>, Error = Simple<Token>> {
322    recursive(|value| {
323        let array = value
324            .clone()
325            .separated_by(expect_token(Token::Comma, Token::SqClose))
326            .delimited_by(just(SqOpen), just(SqClose))
327            .map(Json::Array)
328            .labelled("array");
329
330        // let array = just(SqOpen).ignore_then(value.clone().separated_by(just(Comma))).then_ignore(just(SqClose)).map(Json::Array);
331
332        let member_part = value
333            .map(Result::Ok)
334            .or(one_of([Token::Comma, Token::Colon])
335                .map_with_span(spanned)
336                .map(Result::Err));
337        // let member_value = just(Token::Colon).ignore_then(value.clone());
338        // let member = filter(Token::is_str)
339        //     .map_with_span(spanned)
340        //     .then(member_value.or())
341        //     .validate(|(s, o), span, emit| match o {
342        //         Some(o) => ObjectMember::Full(s, o),
343        //         None => {
344        //             emit(Simple::custom(span, "Erroneous object member"));
345        //             ObjectMember::Partial(s, None, None)
346        //         }
347        //     })
348        //     .labelled("object member");
349
350        let obj = just(CurlOpen)
351            .ignore_then(member_part.repeated().validate(|parts, span, emit| {
352                let mut manager = ObjectMemberManager::new(&span, emit);
353
354                for part in parts {
355                    // manager.print();
356                    match part {
357                        Ok(e) => manager.eat_json(e),
358                        Err(e) => manager.eat_token(e),
359                    }
360                }
361                // manager.print();
362                manager.flush(span, true);
363                manager.out
364            }))
365            .then_ignore(just(CurlClose))
366            .map(Json::Object)
367            .labelled("object");
368
369        // let obj = member
370        //     .map_with_span(spanned)
371        //     .separated_by(just(Comma).recover_with(skip_then_retry_until([])))
372        //     .delimited_by(just(CuOpen), just(CuClose))
373        //     .map(Json::Object);
374
375        let leaves = chumsky::prelude::select! {
376            Null => Json::Token(Null),
377            True => Json::Token(True),
378            False => Json::Token(False),
379            Token::Str(x, st) => Json::Token(Token::Str(x, st)),
380            Token::Number(n) => Json::Token(Token::Number(n)),
381        }
382        .labelled("leaf");
383
384        choice((array, obj, leaves))
385            // .map(std::result::Result::Ok)
386            // .or(any().map(std::result::Result::Err))
387            // .validate(|t, span, emit| match t {
388            //     Ok(x) => x,
389            //     Err(v) => {
390            //         emit(Simple::custom(span, format!("Expected JSON found {:?}", v)));
391            //         Json::Invalid
392            //     }
393            // })
394            .map_with_span(spanned)
395    })
396}
397
398#[cfg(test)]
399mod tests {
400    use lsp_core::prelude::StringStyle;
401
402    use super::*;
403    use crate::lang::tokenizer::tokenize;
404
405    #[test]
406    fn parse_json_simple() {
407        let source = "\"test\"";
408        let (tokens, token_errors) = tokenize(source);
409        let (json, json_errors) = parse(source, tokens);
410
411        assert!(token_errors.is_empty());
412        assert!(json_errors.is_empty());
413
414        assert_eq!(
415            json.into_value(),
416            Json::Token(Token::Str("test".into(), StringStyle::Double))
417        );
418    }
419
420    #[test]
421    fn parse_json_array() {
422        let source = "[\"test\", 42]";
423        let (tokens, token_errors) = tokenize(source);
424        let (json, json_errors) = parse(source, tokens);
425
426        assert!(token_errors.is_empty());
427        assert!(json_errors.is_empty());
428
429        let arr: Vec<_> = match json.into_value() {
430            Json::Array(x) => x.into_iter().map(|x| x.into_value()).collect(),
431            _ => panic!("Expected json array"),
432        };
433
434        assert_eq!(
435            arr,
436            vec![
437                Json::Token(Token::Str("test".into(), StringStyle::Double)),
438                Json::Token(Token::Number("42".into()))
439            ]
440        );
441    }
442
443    #[test]
444    fn parse_json_object_no_comma() {
445        let source = r#"{
446  "@type": "foaf:Document"
447  "foaf:topic": "foaf:Document"
448}"#;
449
450        let (tokens, token_errors) = tokenize(source);
451        assert_eq!(token_errors, vec![]);
452
453        let (json, json_errors) = parse(source, tokens);
454
455        println!("json errors {:?}", json_errors);
456        assert_eq!(json_errors.len(), 1, "One json error");
457
458        let obj = match json.into_value() {
459            Json::Object(xs) => xs,
460            x => panic!("Expected json object, found {:?}", x),
461        };
462        assert_eq!(obj.len(), 2);
463    }
464
465    #[test]
466    fn parse_json_object_no_value() {
467        let source = r#"{
468  "something":
469  "foaf:topic": "foaf:Document"
470}"#;
471
472        let (tokens, token_errors) = tokenize(source);
473        assert_eq!(token_errors, vec![]);
474
475        let (json, json_errors) = parse(source, tokens);
476
477        for e in &json_errors {
478            println!("json errors {:?}", e);
479        }
480
481        let obj = match json.into_value() {
482            Json::Object(xs) => xs,
483            x => panic!("Expected json object, found {:?}", x),
484        };
485        assert_eq!(obj.len(), 2);
486
487        assert_eq!(
488            json_errors.len(),
489            2,
490            "Erroneous object member and expected comma"
491        );
492    }
493
494    #[test]
495    fn parse_json_object_no_colon_value() {
496        let source = r#"{
497  "something"
498  "foaf:topic": "foaf:Document"
499}"#;
500
501        let (tokens, token_errors) = tokenize(source);
502        assert_eq!(token_errors, vec![]);
503
504        let (json, json_errors) = parse(source, tokens);
505
506        for e in &json_errors {
507            println!("json errors {:?}", e);
508        }
509
510        let obj = match json.into_value() {
511            Json::Object(xs) => xs,
512            x => panic!("Expected json object, found {:?}", x),
513        };
514        assert_eq!(obj.len(), 2);
515
516        for e in &json_errors {
517            println!("e {:?}", e);
518        }
519
520        assert_eq!(
521            json_errors.len(),
522            3,
523            "Erroneous object member and expected comma"
524        );
525    }
526
527    #[ignore]
528    #[test]
529    fn parse_json_array_invalid() {
530        let source = "[\"test\" :  , 42 ]";
531        let (tokens, token_errors) = tokenize(source);
532        let (json, json_errors) = parse(source, tokens);
533
534        assert!(token_errors.is_empty());
535        // assert_eq!(json_errors.len(), 1);
536
537        println!("Error: {:?}", json_errors);
538        let arr: Vec<_> = match json.into_value() {
539            Json::Array(x) => x.into_iter().map(|x| x.into_value()).collect(),
540            x => panic!("Expected json array, got {:?}", x),
541        };
542
543        assert_eq!(
544            arr,
545            vec![
546                Json::Token(Token::Str("test".into(), StringStyle::Double)),
547                Json::Token(Token::Number("42".to_string())),
548            ]
549        );
550    }
551
552    #[test]
553    fn parse_failed() {
554        let source = r#"
555{
556  "@context": [
557    "https://data.vlaanderen.be/doc/applicatieprofiel/sensoren-en-bemonstering/kandidaatstandaard/2022-04-28/context/ap-sensoren-en-bemonstering.jsonld",
558    {
559      "foaf": "foaf_exp"
560    } 
561  ], "test": "test_exp"
562}
563"#;
564
565        let (tokens, token_errors) = tokenize(source);
566        let (_, json_errors) = parse(source, tokens);
567
568        assert!(token_errors.is_empty());
569        assert_eq!(json_errors.len(), 0);
570    }
571}