From c427e8b2625d3ff515bede9379f0e4a21ef9065e Mon Sep 17 00:00:00 2001 From: axel Date: Thu, 23 Sep 2021 18:47:34 +0200 Subject: [PATCH] Named entities for datasets --- app/controllers/catalog_controller.rb | 32 +++++++------------ ...et_controller.js => dataset_controller.js} | 10 ++++-- app/javascript/packs/utils/search_api.js | 17 ++++++++++ app/models/article.rb | 22 +++++++++++++ app/models/dataset.rb | 25 +++++++++++++++ app/models/issue.rb | 22 +++++++++++++ app/views/dataset/show.html.erb | 6 ++-- config/routes.rb | 1 + 8 files changed, 109 insertions(+), 26 deletions(-) rename app/javascript/packs/controllers/{show_dataset_controller.js => dataset_controller.js} (64%) diff --git a/app/controllers/catalog_controller.rb b/app/controllers/catalog_controller.rb index 23cd805..47bcfba 100644 --- a/app/controllers/catalog_controller.rb +++ b/app/controllers/catalog_controller.rb @@ -53,29 +53,19 @@ class CatalogController < ApplicationController def named_entities_for_doc if params[:doc_id].index('_article_').nil? - nems = SolrSearcher.query({q:"issue_id_ssi:#{params[:doc_id]}", rows: 1000000})['response']['docs'] + article = Article.find(params[:doc_id]) + named_entities = article.named_entities else - nems = SolrSearcher.query({q:"article_id_ssi:#{params[:doc_id]}", rows: 1000000})['response']['docs'] + issue = Issue.find(params[:doc_id]) + named_entities = issue.named_entities end - output = {LOC: {}, PER: {}, ORG: {}, HumanProd: {}} - nems.select {|ne_solr| ne_solr['type_ssi'] == "LOC"}.each do |ne_solr| - output[:LOC][ne_solr['linked_entity_ssi']] = [] unless output[:LOC].has_key? ne_solr['linked_entity_ssi'] - output[:LOC][ne_solr['linked_entity_ssi']].append(ne_solr) - end - nems.select {|ne_solr| ne_solr['type_ssi'] == "PER"}.each do |ne_solr| - output[:PER][ne_solr['linked_entity_ssi']] = [] unless output[:PER].has_key? ne_solr['linked_entity_ssi'] - output[:PER][ne_solr['linked_entity_ssi']].append(ne_solr) - end - nems.select {|ne_solr| ne_solr['type_ssi'] == "ORG"}.each do |ne_solr| - output[:ORG][ne_solr['linked_entity_ssi']] = [] unless output[:ORG].has_key? ne_solr['linked_entity_ssi'] - output[:ORG][ne_solr['linked_entity_ssi']].append(ne_solr) - end - nems.select {|ne_solr| ne_solr['type_ssi'] == "HumanProd"}.each do |ne_solr| - output[:HumanProd][ne_solr['linked_entity_ssi']] = [] unless output[:HumanProd].has_key? ne_solr['linked_entity_ssi'] - output[:HumanProd][ne_solr['linked_entity_ssi']].append(ne_solr) - end - render partial: 'named_entities/named_entities', locals: {named_entities: output} - # render json: nems + render partial: 'named_entities/named_entities', locals: {named_entities: named_entities} + end + + def named_entities_for_dataset + dataset = Dataset.find(params[:dataset_id]) + named_entities = dataset.named_entities + render partial: 'named_entities/named_entities', locals: {named_entities: named_entities} end def paginate_facets diff --git a/app/javascript/packs/controllers/show_dataset_controller.js b/app/javascript/packs/controllers/dataset_controller.js similarity index 64% rename from app/javascript/packs/controllers/show_dataset_controller.js rename to app/javascript/packs/controllers/dataset_controller.js index 96ae972..29e9b96 100644 --- a/app/javascript/packs/controllers/show_dataset_controller.js +++ b/app/javascript/packs/controllers/dataset_controller.js @@ -1,18 +1,24 @@ import { Controller } from "stimulus" import {DatasetAPI} from "../utils/dataset_api" -import {Popover} from "bootstrap" +import {SearchAPI} from "../utils/search_api"; export default class extends Controller { static targets = [ ] static values = { id: Number, selected: Boolean } connect() { - console.log("this.idValue", this.idValue) this.loadDocuments(this.idValue, 1, 10, "default", "asc", "all") + this.load_named_entities() } loadDocuments(datasetId, page, per_page, sort, sort_order, type) { DatasetAPI.paginateDataset(datasetId, page, per_page, sort, sort_order, type, (data) => {}) } + load_named_entities() { + SearchAPI.load_dataset_named_entities(this.idValue, (data) => { + $('#named-entities-panel').find(".card-body").html(data) + }) + } + } \ No newline at end of file diff --git a/app/javascript/packs/utils/search_api.js b/app/javascript/packs/utils/search_api.js index 0b7ecb1..9d80c7d 100644 --- a/app/javascript/packs/utils/search_api.js +++ b/app/javascript/packs/utils/search_api.js @@ -1,5 +1,22 @@ export class SearchAPI { + static load_dataset_named_entities(dataset_id, callback) { + $.ajax({ + type: "POST", + url: "/dataset_named_entities", + data: {dataset_id: dataset_id}, + headers: { + 'X-CSRF-Token': $('meta[name="csrf-token"]').attr('content') + }, + success: (data, textStatus, jqXHR) => { + callback(data) + }, + error: (jqXHR, textStatus, errorThrown) => { + + } + }) + } + static load_named_entities(doc_id, callback) { $.ajax({ type: "POST", diff --git a/app/models/article.rb b/app/models/article.rb index e9455a9..7589b9a 100644 --- a/app/models/article.rb +++ b/app/models/article.rb @@ -32,4 +32,26 @@ class Article canvas_size = [canvas_coords[1]-canvas_coords[0], canvas_coords[3]-canvas_coords[2]] [min_x,min_y,canvas_size[0],canvas_size[1]] end + + def named_entities + nems = SolrSearcher.query({q:"article_id_ssi:#{self.id}", rows: 1000000})['response']['docs'] + output = {LOC: {}, PER: {}, ORG: {}, HumanProd: {}} + nems.select {|ne_solr| ne_solr['type_ssi'] == "LOC"}.each do |ne_solr| + output[:LOC][ne_solr['linked_entity_ssi']] = [] unless output[:LOC].has_key? ne_solr['linked_entity_ssi'] + output[:LOC][ne_solr['linked_entity_ssi']].append(ne_solr) + end + nems.select {|ne_solr| ne_solr['type_ssi'] == "PER"}.each do |ne_solr| + output[:PER][ne_solr['linked_entity_ssi']] = [] unless output[:PER].has_key? ne_solr['linked_entity_ssi'] + output[:PER][ne_solr['linked_entity_ssi']].append(ne_solr) + end + nems.select {|ne_solr| ne_solr['type_ssi'] == "ORG"}.each do |ne_solr| + output[:ORG][ne_solr['linked_entity_ssi']] = [] unless output[:ORG].has_key? ne_solr['linked_entity_ssi'] + output[:ORG][ne_solr['linked_entity_ssi']].append(ne_solr) + end + nems.select {|ne_solr| ne_solr['type_ssi'] == "HumanProd"}.each do |ne_solr| + output[:HumanProd][ne_solr['linked_entity_ssi']] = [] unless output[:HumanProd].has_key? ne_solr['linked_entity_ssi'] + output[:HumanProd][ne_solr['linked_entity_ssi']].append(ne_solr) + end + output + end end \ No newline at end of file diff --git a/app/models/dataset.rb b/app/models/dataset.rb index f179e64..6754ed6 100644 --- a/app/models/dataset.rb +++ b/app/models/dataset.rb @@ -49,4 +49,29 @@ class Dataset < ActiveRecord::Base return {docs: solr_docs.nil? ? [] : solr_docs, nb_pages: nb_pages} end + def named_entities + article_ids = self.documents.select {|d| d['type'] == 'article' }.map{|d| d['id']} + issue_ids = self.documents.select {|d| d['type'] == 'issue' }.map{|d| d['id']} + nems = [] + nems = SolrSearcher.query({q: "*:*", fq: "article_id_ssi:(#{article_ids.join(' OR ')})", rows: 1000000})['response']['docs'] unless article_ids.empty? + nems += SolrSearcher.query({q: "*:*", fq: "issue_id_ssi:(#{issue_ids.join(' OR ')})", rows: 1000000})['response']['docs'] unless issue_ids.empty? + output = {LOC: {}, PER: {}, ORG: {}, HumanProd: {}} + nems.select {|ne_solr| ne_solr['type_ssi'] == "LOC"}.each do |ne_solr| + output[:LOC][ne_solr['linked_entity_ssi']] = [] unless output[:LOC].has_key? ne_solr['linked_entity_ssi'] + output[:LOC][ne_solr['linked_entity_ssi']].append(ne_solr) + end + nems.select {|ne_solr| ne_solr['type_ssi'] == "PER"}.each do |ne_solr| + output[:PER][ne_solr['linked_entity_ssi']] = [] unless output[:PER].has_key? ne_solr['linked_entity_ssi'] + output[:PER][ne_solr['linked_entity_ssi']].append(ne_solr) + end + nems.select {|ne_solr| ne_solr['type_ssi'] == "ORG"}.each do |ne_solr| + output[:ORG][ne_solr['linked_entity_ssi']] = [] unless output[:ORG].has_key? ne_solr['linked_entity_ssi'] + output[:ORG][ne_solr['linked_entity_ssi']].append(ne_solr) + end + nems.select {|ne_solr| ne_solr['type_ssi'] == "HumanProd"}.each do |ne_solr| + output[:HumanProd][ne_solr['linked_entity_ssi']] = [] unless output[:HumanProd].has_key? ne_solr['linked_entity_ssi'] + output[:HumanProd][ne_solr['linked_entity_ssi']].append(ne_solr) + end + output + end end diff --git a/app/models/issue.rb b/app/models/issue.rb index b347c53..ff0dd06 100644 --- a/app/models/issue.rb +++ b/app/models/issue.rb @@ -33,4 +33,26 @@ class Issue end i end + + def named_entities + nems = SolrSearcher.query({q:"issue_id_ssi:#{self.id}", rows: 1000000})['response']['docs'] + output = {LOC: {}, PER: {}, ORG: {}, HumanProd: {}} + nems.select {|ne_solr| ne_solr['type_ssi'] == "LOC"}.each do |ne_solr| + output[:LOC][ne_solr['linked_entity_ssi']] = [] unless output[:LOC].has_key? ne_solr['linked_entity_ssi'] + output[:LOC][ne_solr['linked_entity_ssi']].append(ne_solr) + end + nems.select {|ne_solr| ne_solr['type_ssi'] == "PER"}.each do |ne_solr| + output[:PER][ne_solr['linked_entity_ssi']] = [] unless output[:PER].has_key? ne_solr['linked_entity_ssi'] + output[:PER][ne_solr['linked_entity_ssi']].append(ne_solr) + end + nems.select {|ne_solr| ne_solr['type_ssi'] == "ORG"}.each do |ne_solr| + output[:ORG][ne_solr['linked_entity_ssi']] = [] unless output[:ORG].has_key? ne_solr['linked_entity_ssi'] + output[:ORG][ne_solr['linked_entity_ssi']].append(ne_solr) + end + nems.select {|ne_solr| ne_solr['type_ssi'] == "HumanProd"}.each do |ne_solr| + output[:HumanProd][ne_solr['linked_entity_ssi']] = [] unless output[:HumanProd].has_key? ne_solr['linked_entity_ssi'] + output[:HumanProd][ne_solr['linked_entity_ssi']].append(ne_solr) + end + output + end end \ No newline at end of file diff --git a/app/views/dataset/show.html.erb b/app/views/dataset/show.html.erb index 10094c7..c9545df 100644 --- a/app/views/dataset/show.html.erb +++ b/app/views/dataset/show.html.erb @@ -1,11 +1,11 @@ <% if current_user.id == @dataset.user_id %> <% set_page_title @dataset.title %>
+ data-controller="dataset" + data-dataset-id-value="<%= @dataset.id %>">
<%= render partial: 'dataset_header' %> diff --git a/config/routes.rb b/config/routes.rb index 3dd00ba..4fbbe6e 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -9,6 +9,7 @@ Rails.application.routes.draw do get '/catalog/:id', to: 'catalog#show' post '/paginate_facets', to: 'catalog#paginate_facets' post '/named_entities', to: 'catalog#named_entities_for_doc' + post '/dataset_named_entities', to: 'catalog#named_entities_for_dataset' get '/datasets', to: 'dataset#index' get '/datasets/update', to: 'dataset#update_datasets_list' -- GitLab