lsp_core/systems/
lov.rs

1use std::{
2    borrow::Cow,
3    collections::{HashMap, HashSet},
4};
5
6use bevy_ecs::{prelude::*, world::CommandQueue};
7use lov::{LocalPrefix, LOCAL_PREFIXES};
8use serde::Deserialize;
9use sophia_api::{
10    prelude::{Any, Dataset},
11    quad::Quad,
12    term::{matcher::TermMatcher, Term as _},
13};
14use tracing::{debug, error, info, instrument, span};
15
16use super::prefix::PREFIX_CC;
17use crate::{
18    lsp_types::{TextDocumentItem, Url},
19    prelude::*,
20    util::{
21        fs::Fs,
22        ns::{owl, rdfs},
23    },
24};
25
26#[derive(Deserialize, Debug)]
27struct Version {
28    #[serde(rename = "fileURL")]
29    file_url: Option<String>,
30    issued: chrono::DateTime<chrono::Utc>,
31}
32
33#[derive(Deserialize, Debug)]
34struct Vocab {
35    versions: Vec<Version>,
36}
37
38pub fn populate_known_ontologies(mut commands: Commands) {
39    for lov in LOCAL_PREFIXES.iter() {
40        commands.spawn(lov.clone());
41    }
42
43    for (i, (prefix, url)) in PREFIX_CC
44        .split('\n')
45        .flat_map(|x| {
46            let mut s = x.split(' ');
47            let first = s.next()?;
48            let second = s.next()?;
49            Some((first.to_string(), second.to_string()))
50        })
51        .enumerate()
52    {
53        let pref: Cow<'static, str> = prefix.into();
54        let lov = LocalPrefix {
55            location: url.into(),
56            content: Cow::Borrowed(""),
57            name: pref.clone(),
58            title: pref,
59            rank: i + 2,
60        };
61
62        commands.spawn(lov.clone());
63    }
64}
65
66// Do we check whether or not the namespace url and the prefix url are the same?
67async fn extract_file_url(prefix: &str, client: &impl Client) -> Option<String> {
68    let url = format!(
69        "https://lov.linkeddata.es/dataset/lov/api/v2/vocabulary/info?vocab={}",
70        prefix
71    );
72    match client.fetch(&url, &std::collections::HashMap::new()).await {
73        Ok(resp) if resp.status == 200 => match serde_json::from_str::<Vocab>(&resp.body) {
74            Ok(x) => {
75                let versions: Vec<_> = x.versions.iter().flat_map(|x| &x.file_url).collect();
76                debug!(
77                    "Found lov response ({} versions) {:?}",
78                    x.versions.len(),
79                    versions
80                );
81                x.versions
82                    .into_iter()
83                    .flat_map(|x| x.file_url.map(|url| (url, x.issued)))
84                    .max_by_key(|x| x.1)
85                    .map(|x| x.0)
86            }
87            Err(e) => {
88                error!("Deserialize failed ({}) {:?}", url, e);
89                None
90            }
91        },
92        Ok(resp) => {
93            error!("Fetch ({}) failed status {}", url, resp.status);
94            None
95        }
96        Err(e) => {
97            error!("Fetch ({}) failed {:?}", url, e);
98            None
99        }
100    }
101}
102
103pub fn open_imports<C: Client + Resource>(
104    query: Query<(&Triples, &RopeC), Changed<Triples>>,
105    mut opened: Local<HashSet<String>>,
106    sender: Res<CommandSender>,
107    fs: Res<Fs>,
108    client: Res<C>,
109) {
110    for (triples, _) in &query {
111        for object in triples
112            .quads_matching(Any, [owl::imports], Any, Any)
113            .flatten()
114            .flat_map(|s| s.o().iri())
115            .flat_map(|s| Url::parse(s.as_str()))
116        {
117            if opened.contains(object.as_str()) {
118                continue;
119            }
120            opened.insert(object.as_str().to_string());
121
122            let fs = fs.clone();
123            let sender = sender.clone();
124            let fut = async move {
125                if let Some(content) = fs.0.read_file(&object).await {
126                    spawn_document(object, content, &sender.0, |_, _| {});
127
128                    let mut command_queue = CommandQueue::default();
129                    command_queue.push(move |world: &mut World| {
130                        world.run_schedule(SaveLabel);
131                    });
132                    let _ = sender.unbounded_send(command_queue);
133                } else {
134                    info!("No content found for {}", object);
135                }
136            };
137            client.spawn(fut);
138        }
139    }
140}
141
142/// First of al, fetch the lov dataset information at url https://lov.linkeddata.es/dataset/lov/api/v2/vocabulary/info?vocab=${prefix}
143/// Next, extract that json object into an object and find the latest dataset
144pub fn fetch_lov_properties<C: Client + Resource>(
145    sender: Res<CommandSender>,
146    query: Query<
147        &Prefixes,
148        (
149            Or<((Changed<Prefixes>, With<Open>), Changed<Open>)>,
150            // Without<Dirty>,
151        ),
152    >,
153    ontologies: Query<(Entity, &LocalPrefix)>,
154    mut prefixes: Local<HashSet<String>>,
155    client: Res<C>,
156    fs: Res<Fs>,
157) {
158    for prefs in &query {
159        for prefix in prefs.0.iter() {
160            if !prefixes.contains(prefix.url.as_str()) {
161                prefixes.insert(prefix.url.to_string());
162                if let Some(url) = fs.0.lov_url(prefix.url.as_str(), &prefix.prefix) {
163                    info!("Other virtual url {}", url);
164                    if let Some((e, local)) = ontologies
165                        .iter()
166                        .find(|(_, x)| x.location == prefix.url.as_str())
167                    {
168                        debug!("Local lov");
169
170                        let c = client.as_ref().clone();
171                        let sender = sender.0.clone();
172                        client.spawn(local_lov::<C>(local.clone(), url, sender, fs.clone(), c));
173                    } else {
174                        debug!("Remove lov");
175                        let sender = sender.0.clone();
176                        let c = client.as_ref().clone();
177                        client.spawn(fetch_lov(prefix.clone(), url, c, sender, fs.clone()));
178                    }
179                } else {
180                    debug!("Failed to find url");
181                }
182            } else {
183                debug!("Prefixes is already present {}", prefix.url);
184            }
185        }
186    }
187}
188
189type Sender = futures::channel::mpsc::UnboundedSender<CommandQueue>;
190fn spawn_document(
191    url: Url,
192    content: String,
193    sender: &Sender,
194    extra: impl FnOnce(Entity, &mut World) -> () + Send + Sync + 'static,
195) {
196    let mut command_queue = CommandQueue::default();
197    let item = TextDocumentItem {
198        version: 1,
199        uri: url.clone(),
200        language_id: String::from("turtle"),
201        text: String::new(),
202    };
203
204    let spawn = spawn_or_insert(
205        url.clone(),
206        (
207            RopeC(ropey::Rope::from_str(&content)),
208            Source(content.clone()),
209            Label(url.clone()), // this might crash
210            Wrapped(item),
211            Types(HashMap::new()),
212        ),
213        Some("turtle".into()),
214        (),
215    );
216
217    command_queue.push(move |world: &mut World| {
218        let span = span!(tracing::Level::INFO, "span lov");
219        let _enter = span.enter();
220        let e = spawn(world);
221
222        extra(e, world);
223
224        world.run_schedule(ParseLabel);
225        drop(_enter);
226    });
227
228    let _ = sender.unbounded_send(command_queue);
229}
230
231fn extra_from_lov<C: Client + Resource>(
232    from: FromPrefix,
233    content: String,
234    url: Url,
235    fs: Fs,
236) -> impl FnOnce(Entity, &mut World) + Send + Sync + 'static {
237    move |e, world| {
238        world.entity_mut(e).insert(from);
239
240        let client = world.resource::<C>();
241        client.spawn(async move {
242            fs.0.write_file(&url, &content).await;
243        });
244    }
245}
246
247async fn fetch_lov_body<C: Client + Resource>(prefix: &str, c: C) -> Option<String> {
248    if let Some(url) = extract_file_url(&prefix, &c).await {
249        match c.fetch(&url, &std::collections::HashMap::new()).await {
250            Ok(resp) if resp.status == 200 => return Some(resp.body),
251            Ok(resp) => {
252                error!("Fetch ({}) failed status {}", url, resp.status);
253            }
254            Err(e) => {
255                error!("Fetch ({}) failed {:?}", url, e);
256            }
257        }
258    }
259    None
260}
261async fn fetch_lov<C: Client + Resource>(prefix: Prefix, label: Url, c: C, sender: Sender, fs: Fs) {
262    if let Some(body) = fetch_lov_body(&prefix.prefix, c).await {
263        let extra = extra_from_lov::<C>(FromPrefix(prefix), body.clone(), label.clone(), fs);
264        spawn_document(label, body, &sender, extra);
265    }
266}
267
268// TODO: this should be spawned on the entity of the localprefix
269async fn local_lov<C: Client + Resource>(
270    local: lov::LocalPrefix,
271    label: Url,
272    sender: Sender,
273    fs: Fs,
274    c: C,
275) {
276    info!("Using local {}", local.name);
277    let content = if local.content.is_empty() {
278        info!("Fetching from LOV");
279        // This local is added by prefix, not by an actual local lov,
280        if let Some(body) = fetch_lov_body(&local.name, c).await {
281            Cow::Owned(body)
282        } else {
283            return;
284        }
285    } else {
286        local.content
287    };
288
289    let from = FromPrefix(Prefix {
290        prefix: local.name.to_string(),
291        url: Url::parse(&local.location).unwrap(),
292    });
293
294    let extra = extra_from_lov::<C>(from, content.to_string(), label.clone(), fs);
295    spawn_document(label, content.to_string(), &sender, extra);
296}
297
298#[derive(Component)]
299pub struct OntologyExtract;
300
301#[instrument(skip(commands))]
302pub fn init_onology_extractor(mut commands: Commands, fs: Res<Fs>) {
303    for local in lov::LOCAL_PREFIXES
304        .iter()
305        .filter(|x| ["rdf", "rdfs", "owl"].iter().any(|y| *y == x.name))
306    {
307        let url = fs.0.lov_url(&local.location, &local.name).unwrap();
308        info!("Virtual url {}", url.to_string());
309
310        // let url = crate::lsp_types::Url::from_str(local.location).unwrap();
311        let item = TextDocumentItem {
312            version: 1,
313            uri: url.clone(),
314            language_id: String::from("turtle"),
315            text: String::new(),
316        };
317
318        let spawn = spawn_or_insert(
319            url.clone(),
320            (
321                Source(local.content.to_string()),
322                RopeC(ropey::Rope::from_str(&local.content)),
323                Label(url),
324                Wrapped(item),
325                Types(HashMap::new()),
326            ),
327            Some("turtle".into()),
328            OntologyExtract,
329        );
330
331        info!("Init onology {}", local.name);
332        commands.queue(move |world: &mut World| {
333            info!("Spawned");
334            spawn(world);
335        });
336    }
337}
338
339#[instrument(skip(query, extractor))]
340pub fn check_added_ontology_extract(
341    query: Query<(&Triples, &Label), (Added<Triples>, With<OntologyExtract>)>,
342    mut extractor: ResMut<OntologyExtractor>,
343) {
344    let mut changed = false;
345    for (triples, label) in &query {
346        info!("Added triples from {}", label.as_str());
347        extractor.quads.extend(triples.0.iter().cloned());
348        changed = true;
349    }
350    if changed {
351        extractor.extract();
352    }
353}
354
355#[derive(Debug, Resource)]
356pub struct OntologyExtractor {
357    quads: Vec<MyQuad<'static>>,
358    properties: Vec<MyTerm<'static>>,
359    classes: Vec<MyTerm<'static>>,
360}
361
362struct LocalMatcher<'a> {
363    properties: &'a [MyTerm<'static>],
364}
365
366impl TermMatcher for LocalMatcher<'_> {
367    type Term = MyTerm<'static>;
368
369    fn matches<T2: sophia_api::prelude::Term + ?Sized>(&self, term: &T2) -> bool {
370        for p in self.properties {
371            if term.eq(p) {
372                return false;
373            }
374        }
375
376        true
377    }
378}
379
380impl OntologyExtractor {
381    pub fn new() -> Self {
382        Self {
383            quads: vec![],
384            classes: vec![MyTerm::<'static>::named_node(
385                "http://www.w3.org/2000/01/rdf-schema#Class",
386                0..1,
387            )],
388            properties: vec![MyTerm::<'static>::named_node(
389                "http://www.w3.org/1999/02/22-rdf-syntax-ns#Property",
390                0..1,
391            )],
392        }
393    }
394
395    pub fn properties<'a>(&'a self) -> &'a [MyTerm<'static>] {
396        &self.properties[..]
397    }
398
399    pub fn classes<'a>(&'a self) -> &'a [MyTerm<'static>] {
400        &self.classes[..]
401    }
402
403    fn extract_step(quads: &Vec<MyQuad<'static>>, items: &mut Vec<MyTerm<'static>>) -> bool {
404        let new_items: Vec<_> = quads
405            .quads_matching(
406                LocalMatcher { properties: &items },
407                [rdfs::subClassOf],
408                &items[..],
409                Any,
410            )
411            .flatten()
412            .map(|x| x.to_s().to_owned())
413            .collect();
414
415        let added = !new_items.is_empty();
416        items.extend(new_items);
417        added
418    }
419
420    fn extract(&mut self) {
421        loop {
422            if !OntologyExtractor::extract_step(&self.quads, &mut self.properties) {
423                break;
424            }
425        }
426
427        loop {
428            if !OntologyExtractor::extract_step(&self.quads, &mut self.classes) {
429                break;
430            }
431        }
432    }
433}
434
435// #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
436// pub enum EntryState {
437//     Ready,
438//     Transit,
439// }
440//
441// #[derive(Debug, Clone, Serialize, Deserialize)]
442// pub struct LovEntry {
443//     prefix: String,
444//     url: String,
445//     num: usize,
446//     state: EntryState,
447// }
448//
449// impl LovEntry {
450//     fn name(&self) -> String {
451//         format!("{}-{}.ttl", self.num, self.prefix)
452//     }
453//     pub fn url(&self, cache: &Cache) -> Option<Url> {
454//         self.file_url(cache).or_else(|| self.remote_url())
455//     }
456//
457//     #[cfg(not(target_arch = "wasm32"))]
458//     fn file_url(&self, cache: &Cache) -> Option<Url> {
459//         let p = cache.path()?;
460//         let url = p.join(self.name());
461//         Url::from_file_path(url).ok()
462//     }
463//
464//     #[cfg(target_arch = "wasm32")]
465//     fn file_url(&self, cache: &Cache) -> Option<Url> {
466//         None
467//     }
468//
469//     fn remote_url(&self) -> Option<Url> {
470//         Url::from_str(&self.url).ok()
471//     }
472//
473//     pub fn save(&mut self, cache: &Cache, content: &str) -> Option<()> {
474//         self.state = EntryState::Ready;
475//         cache.write_file(&self.name(), content)
476//     }
477// }
478//
479// #[derive(Debug, Clone, Serialize, Deserialize, Resource)]
480// pub struct LovHelper {
481//     entries: Vec<LovEntry>,
482// }
483//
484// impl LovHelper {
485//     fn try_from_cache(cache: &Cache) -> Option<Self> {
486//         let c = cache.get_file("index.json")?;
487//         info!("Found index file! {}", c);
488//         serde_json::from_str(&c).ok()
489//     }
490//
491//     pub fn from_cache(cache: &Cache) -> Self {
492//         Self::try_from_cache(cache).unwrap_or_else(|| Self {
493//             entries: Vec::new(),
494//         })
495//     }
496//
497//     pub fn save(mut self, cache: &Cache) -> Option<()> {
498//         self.entries = self
499//             .entries
500//             .into_iter()
501//             .filter(|x| x.state == EntryState::Ready)
502//             .collect();
503//         let st = serde_json::to_string(&self).ok()?;
504//         info!("Save index file! {}", st);
505//         cache.write_file("index.json", &st)
506//     }
507//
508//     pub fn has_entry_mut(&mut self, prefix: &Prefix) -> Option<&mut LovEntry> {
509//         self.entries
510//             .iter_mut()
511//             .find(|e| e.prefix == prefix.prefix && e.url == prefix.url.as_str())
512//     }
513//
514//     pub fn has_entry(&self, prefix: &Prefix) -> Option<&LovEntry> {
515//         self.entries
516//             .iter()
517//             .find(|e| e.prefix == prefix.prefix && e.url == prefix.url.as_str())
518//     }
519//
520//     pub fn create_entry(&mut self, prefix: &Prefix) -> &LovEntry {
521//         debug!("Create entry for {:?}", prefix);
522//         if let Some(e) = self.entries.iter().enumerate().find_map(|(i, e)| {
523//             (e.prefix == prefix.prefix && e.url == prefix.url.as_str()).then_some(i)
524//         }) {
525//             return &self.entries[e];
526//         }
527//         let c = self
528//             .entries
529//             .iter()
530//             .filter(|x| x.prefix == prefix.prefix)
531//             .count();
532//         let entry = LovEntry {
533//             prefix: prefix.prefix.to_string(),
534//             url: prefix.url.to_string(),
535//             num: c,
536//             state: EntryState::Transit,
537//         };
538//         self.entries.push(entry);
539//         self.entries.last().unwrap()
540//     }
541//
542//     pub fn save_prefix(&mut self, cache: &Cache, prefix: &Prefix, content: &str) -> Option<()> {
543//         let e = self
544//             .entries
545//             .iter_mut()
546//             .find(|e| (e.prefix == prefix.prefix && e.url == prefix.url.as_str()))?;
547//         e.save(cache, content)
548//     }
549// }
550
551#[derive(Debug, Clone, Component)]
552pub struct FromPrefix(pub Prefix);