From 7f2b871637db7e8650fbffe4377c065d917c0793 Mon Sep 17 00:00:00 2001 From: Simon Mayer Date: Mon, 29 Aug 2022 15:04:58 +0200 Subject: [PATCH] Format code to conform to Ruby standards --- app/channels/application_cable/connection.rb | 29 +- app/controllers/application_controller.rb | 13 +- app/controllers/catalog_controller.rb | 392 +++++++++--------- app/controllers/dataset_controller.rb | 387 ++++++++--------- app/controllers/experiment_controller.rb | 197 +++++---- app/controllers/notification_controller.rb | 4 +- app/controllers/tool_controller.rb | 31 +- app/helpers/application_helper.rb | 8 +- app/helpers/experiment_helper.rb | 28 +- app/helpers/named_entities_helper.rb | 64 ++- app/helpers/search_helper.rb | 56 ++- app/mailers/application_mailer.rb | 4 +- app/models/article.rb | 177 ++++---- app/models/compound_article.rb | 8 +- app/models/concerns/abstract_searcher.rb | 17 +- app/models/current.rb | 2 +- app/models/dataset.rb | 197 +++++---- app/models/experiment.rb | 168 ++++---- app/models/issue.rb | 181 ++++---- app/models/notification.rb | 7 +- app/models/page.rb | 55 ++- app/models/solr_query.rb | 94 ++--- app/models/solr_searcher.rb | 39 +- app/models/tool.rb | 26 +- app/models/user.rb | 42 +- app/workers/export_dataset_worker.rb | 158 +++---- app/workers/ngrams_worker.rb | 105 ++--- app/workers/preprocess_worker.rb | 101 ++--- app/workers/search_to_dataset_worker.rb | 62 +-- app/workers/source_dataset_worker.rb | 103 ++--- app/workers/splitter_worker.rb | 8 +- config/application.rb | 43 +- config/boot.rb | 2 +- config/environments/production.rb | 10 +- config/environments/test.rb | 4 +- config/initializers/assets.rb | 4 +- .../initializers/filter_parameter_logging.rb | 2 +- config/initializers/session_store.rb | 4 +- config/routes.rb | 98 ++--- db/migrate/20210721081005_create_users.rb | 2 +- .../20210903194218_create_experiment.rb | 16 +- db/migrate/20210909142841_create_datasets.rb | 18 +- db/migrate/20210915140752_add_tool.rb | 22 +- .../20211110092535_add_notifications_table.rb | 12 +- ...20211123112405_create_compound_articles.rb | 28 +- db/schema.rb | 2 +- test/test_helper.rb | 2 +- test/workers/tool_runner_worker_test.rb | 3 +- 48 files changed, 1513 insertions(+), 1522 deletions(-) diff --git a/app/channels/application_cable/connection.rb b/app/channels/application_cable/connection.rb index af25058..a427edb 100644 --- a/app/channels/application_cable/connection.rb +++ b/app/channels/application_cable/connection.rb @@ -1,21 +1,22 @@ module ApplicationCable - ## - # This class identifies the current user in a websocket communication using ApplicationCable + ## + # This class identifies the current user in a websocket communication using ApplicationCable class Connection < ActionCable::Connection::Base - identified_by :current_user + identified_by :current_user - def connect - self.current_user = find_user - end + def connect + self.current_user = find_user + end - def find_user - user_id = cookies.signed["user.id"] - current_user = User.find_by(id: user_id) - if current_user - current_user - else - reject_unauthorized_connection - end + def find_user + current_user = User.find_by(labs_user_id: "2") + if current_user + puts "Current user case, id #{current_user.id}" + current_user + else + puts "Unauthorized connection rejected" + reject_unauthorized_connection end + end end end diff --git a/app/controllers/application_controller.rb b/app/controllers/application_controller.rb index 81692ba..3863792 100644 --- a/app/controllers/application_controller.rb +++ b/app/controllers/application_controller.rb @@ -1,10 +1,9 @@ class ApplicationController < ActionController::Base - include Authentication - - def send_file - File.open("tmp/#{params[:filename]}", 'r') do |f| - send_data f.read, type: "text/json", filename: params[:filename] - end - end + include Authentication + def send_file + File.open("tmp/#{params[:filename]}", "r") do |f| + send_data f.read, type: "text/json", filename: params[:filename] + end + end end diff --git a/app/controllers/catalog_controller.rb b/app/controllers/catalog_controller.rb index 2b7a4aa..ecd7a53 100644 --- a/app/controllers/catalog_controller.rb +++ b/app/controllers/catalog_controller.rb @@ -1,220 +1,220 @@ class CatalogController < ApplicationController + before_action :authenticate_user!, :strip_input_fields - before_action :authenticate_user!, :strip_input_fields + def home + end - def home - - end - - ## - # Creates a search query and submit it to the index. Retrieve and displays results + metadata. - def index - if params[:q] - @search_type = params[:search_type].nil? ? "exact" : params[:search_type] - @solr_params = SolrQuery.new(@search_type).to_params - @solr_params[:q] = params[:q] - @solr_params[:rows] = params[:per_page] if params[:per_page] - @current_page = params[:page].to_i != 0 ? params[:page].to_i : 1 - @solr_params[:start] = params[:page].to_i != 0 ? @solr_params[:rows] * (params[:page].to_i-1) : 0 - @solr_params[:sort] = params[:sort] if params[:sort] - if params[:f] - params[:f].each do |k,v| - if k == "date_created_dtsi" # v is a hash {to: "", from: ""} - @solr_params[:fq] << "#{k}:[#{v['from']}T00:00:00Z TO #{v['to']}T00:00:00Z]" - else - if v.is_a? Array - v.each do |val| - @solr_params[:fq] << "#{k}:#{val}" - end - end - end - end + ## + # Creates a search query and submit it to the index. Retrieve and displays results + metadata. + def index + if params[:q] + @search_type = params[:search_type].nil? ? "exact" : params[:search_type] + @solr_params = SolrQuery.new(@search_type).to_params + @solr_params[:q] = params[:q] + @solr_params[:rows] = params[:per_page] if params[:per_page] + @current_page = params[:page].to_i != 0 ? params[:page].to_i : 1 + @solr_params[:start] = params[:page].to_i != 0 ? @solr_params[:rows] * (params[:page].to_i - 1) : 0 + @solr_params[:sort] = params[:sort] if params[:sort] + if params[:f] + params[:f].each do |k, v| + if k == "date_created_dtsi" # v is a hash {to: "", from: ""} + @solr_params[:fq] << "#{k}:[#{v["from"]}T00:00:00Z TO #{v["to"]}T00:00:00Z]" + else + if v.is_a? Array + v.each do |val| + @solr_params[:fq] << "#{k}:#{val}" + end end - session['search_params'] = @solr_params - session['query_params'] = params.to_unsafe_h.slice('q', 'page', 'per_page','sort', 'f') - @results = SolrSearcher.query @solr_params - puts @results.to_json if Rails.env == "development" - @resulting_docs = @results['response']['docs'].map do |solr_doc| - case solr_doc['has_model_ssim'] - when ['Article'] - Article.from_solr_doc solr_doc - when ['Issue'] - Issue.from_solr_doc solr_doc - end - end - entities_fields = I18n.t("newspapers.solr_fields").values_at(:persons, :locations, :organisations, :human_productions) - @entities_labels = [] - entities_fields.each do |entity_field| - (@entities_labels << @results['facets'][entity_field]['buckets'].map{|ne| ne['val']}).flatten! if @results['facets'][entity_field] - end - @entities_labels = helpers.get_entity_label @entities_labels + end + end + end + session["search_params"] = @solr_params + session["query_params"] = params.to_unsafe_h.slice("q", "page", "per_page", "sort", "f") + @results = SolrSearcher.query @solr_params + # puts @results.to_json if Rails.env == "development" + @resulting_docs = @results["response"]["docs"].map do |solr_doc| + case solr_doc["has_model_ssim"] + when ["Article"] + Article.from_solr_doc solr_doc + when ["Issue"] + Issue.from_solr_doc solr_doc end + end + entities_fields = I18n.t("newspapers.solr_fields").values_at(:persons, :locations, :organisations, :human_productions) + @entities_labels = [] + entities_fields.each do |entity_field| + (@entities_labels << @results["facets"][entity_field]["buckets"].map { |ne| ne["val"] }).flatten! if @results["facets"][entity_field] + end + @entities_labels = helpers.get_entity_label @entities_labels end + end - ## - # Display an issue - def show - @issue = Issue.from_solr params[:id], with_pages=true, with_articles=true - session['named_entities'] = Issue.named_entities @issue.id - session['named_entities_labels'] = helpers.get_linked_entities session['named_entities'].map{ |k,v| v.keys }.flatten.uniq - end + ## + # Display an issue + def show + @issue = Issue.from_solr params[:id], with_pages = true, with_articles = true + session["named_entities"] = Issue.named_entities @issue.id + session["named_entities_labels"] = helpers.get_linked_entities session["named_entities"].map { |k, v| v.keys }.flatten.uniq + puts session.inspect + end - ## - # Retrieve named entities for a list of documents (issue and/or articles) - def named_entities_for_docs - named_entities = {LOC: {}, PER: {}, ORG: {}, HumanProd: {}} - params[:docs_ids].each do |doc_id| - if doc_id.index('_article_').nil? - doc_named_entities = session['named_entities'] - else # if article, filter stored list - doc_named_entities = session['named_entities'].map{ |ne_type, ne_list| - [ne_type,ne_list.select{ |linked_id, namedentities| - namedentities.any?{ |ne| - ne['article_id_ssi'] == doc_id - } - }.map{ |k,v| [k,v.select{ |ne| ne['article_id_ssi'] == doc_id }] }.to_h] - }.to_h - end - named_entities[:LOC] = named_entities[:LOC].merge(doc_named_entities[:LOC]) do |key,oldval,newval| - oldval.concat newval - end - named_entities[:ORG] = named_entities[:ORG].merge(doc_named_entities[:ORG]) do |key,oldval,newval| - oldval.concat newval - end - named_entities[:PER] = named_entities[:PER].merge(doc_named_entities[:PER]) do |key,oldval,newval| - oldval.concat newval - end - named_entities[:HumanProd] = named_entities[:HumanProd].merge(doc_named_entities[:HumanProd]) do |key,oldval,newval| - oldval.concat newval - end - end - render partial: 'named_entities/named_entities', locals: {named_entities: named_entities, linked_entities: session['named_entities_labels']} + ## + # Retrieve named entities for a list of documents (issue and/or articles) + def named_entities_for_docs + named_entities = { LOC: {}, PER: {}, ORG: {}, HumanProd: {} } + params[:docs_ids].each do |doc_id| + if doc_id.index("_article_").nil? + doc_named_entities = session["named_entities"] + else # if article, filter stored list + doc_named_entities = session["named_entities"].map { |ne_type, ne_list| + [ne_type, ne_list.select { |linked_id, namedentities| + namedentities.any? { |ne| + ne["article_id_ssi"] == doc_id + } + }.map { |k, v| [k, v.select { |ne| ne["article_id_ssi"] == doc_id }] }.to_h] + }.to_h + end + named_entities[:LOC] = named_entities[:LOC].merge(doc_named_entities[:LOC]) do |key, oldval, newval| + oldval.concat newval + end + named_entities[:ORG] = named_entities[:ORG].merge(doc_named_entities[:ORG]) do |key, oldval, newval| + oldval.concat newval + end + named_entities[:PER] = named_entities[:PER].merge(doc_named_entities[:PER]) do |key, oldval, newval| + oldval.concat newval + end + named_entities[:HumanProd] = named_entities[:HumanProd].merge(doc_named_entities[:HumanProd]) do |key, oldval, newval| + oldval.concat newval + end end + render partial: "named_entities/named_entities", locals: { named_entities: named_entities, linked_entities: session["named_entities_labels"] } + end - ## - # Retrieve named entities for a dataset - def named_entities_for_dataset - dataset = Dataset.find(params[:dataset_id]) - named_entities = dataset.named_entities - named_entities_labels = helpers.get_linked_entities named_entities.map{ |k,v| v.keys }.flatten.uniq - render partial: 'named_entities/named_entities', locals: {named_entities: named_entities, linked_entities: named_entities_labels} - end + ## + # Retrieve named entities for a dataset + def named_entities_for_dataset + dataset = Dataset.find(params[:dataset_id]) + named_entities = dataset.named_entities + named_entities_labels = helpers.get_linked_entities named_entities.map { |k, v| v.keys }.flatten.uniq + render partial: "named_entities/named_entities", locals: { named_entities: named_entities, linked_entities: named_entities_labels } + end - ## - # Retrieve and display paginated facets - def paginate_facets - out = {} - if params[:field_name] != "" - search_params = session['search_params'] - search_params['rows'] = 0 - search_params['json.facet'] = {"#{params[:field_name]}": {terms: { - field: params[:field_name], - limit: 15, - numBuckets: true, - offset: (params[:current_page].to_i-1) * 15}}}.to_json - res = SolrSearcher.query search_params - entities_labels = [res['facets'][params[:field_name]]['buckets'].map{|ne| ne['val']}] - entities_labels = helpers.get_entity_label entities_labels - facet_constraints = search_params['fq'].select { |fq| fq.split(':')[0] == params[:field_name] }.map{|fq| {label: params[:field_name], value: fq.split(':')[1]} } - out[:facets_entries] = [] - res['facets'][params[:field_name]]['buckets'].each do |facet_entry| - out[:facets_entries] << render_to_string(layout: false, partial: "facet_entry", locals: { - entities_labels: entities_labels, - facet_constraints: facet_constraints, - field: params[:field_name], - facet: facet_entry, - index: params[:current_page].to_i, - per_page: 15 - }) - end - end - out[:pagination] = render_to_string(layout: false, partial: 'facet_pagination', locals: {nb_pages: params[:nb_pages].to_i, current_page: params[:current_page].to_i}) - render json: out + ## + # Retrieve and display paginated facets + def paginate_facets + out = {} + if params[:field_name] != "" + search_params = session["search_params"] + search_params["rows"] = 0 + search_params["json.facet"] = { "#{params[:field_name]}": { terms: { + field: params[:field_name], + limit: 15, + numBuckets: true, + offset: (params[:current_page].to_i - 1) * 15, + } } }.to_json + res = SolrSearcher.query search_params + entities_labels = [res["facets"][params[:field_name]]["buckets"].map { |ne| ne["val"] }] + entities_labels = helpers.get_entity_label entities_labels + facet_constraints = search_params["fq"].select { |fq| fq.split(":")[0] == params[:field_name] }.map { |fq| { label: params[:field_name], value: fq.split(":")[1] } } + out[:facets_entries] = [] + res["facets"][params[:field_name]]["buckets"].each do |facet_entry| + out[:facets_entries] << render_to_string(layout: false, partial: "facet_entry", locals: { + entities_labels: entities_labels, + facet_constraints: facet_constraints, + field: params[:field_name], + facet: facet_entry, + index: params[:current_page].to_i, + per_page: 15, + }) + end end + out[:pagination] = render_to_string(layout: false, partial: "facet_pagination", locals: { nb_pages: params[:nb_pages].to_i, current_page: params[:current_page].to_i }) + render json: out + end - ## - # Open modal for date frequencies histogram in wide format - def wide_dates_histogram - out = {} - out[:modal_content] = render_to_string(layout: false, partial: "wide_dates_histogram") - render json: out - end + ## + # Open modal for date frequencies histogram in wide format + def wide_dates_histogram + out = {} + out[:modal_content] = render_to_string(layout: false, partial: "wide_dates_histogram") + render json: out + end - ## - # Open Modal to confirm the creation of a compound article - def confirm_compound_creation - out = {} - out[:modal_content] = render_to_string(layout: false, partial: "confirm_compound_creation", locals: {article_parts: params[:article_parts]}) - render json: out - end + ## + # Open Modal to confirm the creation of a compound article + def confirm_compound_creation + out = {} + out[:modal_content] = render_to_string(layout: false, partial: "confirm_compound_creation", locals: { article_parts: params[:article_parts] }) + render json: out + end - ## - # Create a new compound article - def create_compound - compound = CompoundArticle.new - compound.user = current_user - compound.title = params[:title] - compound.issue_id = params[:issue_id] - issue = Issue.from_solr params[:issue_id] - compound.newspaper = issue.newspaper - compound.date_created = issue.date_created - compound.thumbnail_url = issue.thumbnail_url - compound.language = issue.language - compound.all_text = params[:all_text] - compound.parts = params[:article_parts_ids] - begin - compound.save! - render json: {status: 'ok', html: render_to_string(layout: false, partial: "compound_articles_panel", locals: {issue_id: params[:issue_id]})} - rescue ActiveRecord::RecordNotUnique - render json: {status: "error", message: "A compound article with this title already exists."} - rescue ActiveRecord::RecordInvalid - render json: {status: "error", message: "The title should not be blank."} - end + ## + # Create a new compound article + def create_compound + compound = CompoundArticle.new + compound.user = current_user + compound.title = params[:title] + compound.issue_id = params[:issue_id] + issue = Issue.from_solr params[:issue_id] + compound.newspaper = issue.newspaper + compound.date_created = issue.date_created + compound.thumbnail_url = issue.thumbnail_url + compound.language = issue.language + compound.all_text = params[:all_text] + compound.parts = params[:article_parts_ids] + begin + compound.save! + render json: { status: "ok", html: render_to_string(layout: false, partial: "compound_articles_panel", locals: { issue_id: params[:issue_id] }) } + rescue ActiveRecord::RecordNotUnique + render json: { status: "error", message: "A compound article with this title already exists." } + rescue ActiveRecord::RecordInvalid + render json: { status: "error", message: "The title should not be blank." } end + end - ## - # Delete an existing compound - def delete_compound - compound = CompoundArticle.find(params[:compound_id]) - issue_id = compound.issue_id - current_user.datasets.each do |dataset| - if dataset.documents.any?{|doc| doc['id'].to_s == compound.id.to_s} - dataset.documents = dataset.documents.select{|doc| doc['id'].to_s != compound.id.to_s} - dataset.save! - end - end - compound.destroy - out = {} - out[:html] = render_to_string(layout: false, partial: "compound_articles_panel", locals: {issue_id: issue_id}) - out[:datasets] = render_to_string(layout: false, partial: "manage_datasets_content_show_page") - render json: out + ## + # Delete an existing compound + def delete_compound + compound = CompoundArticle.find(params[:compound_id]) + issue_id = compound.issue_id + current_user.datasets.each do |dataset| + if dataset.documents.any? { |doc| doc["id"].to_s == compound.id.to_s } + dataset.documents = dataset.documents.select { |doc| doc["id"].to_s != compound.id.to_s } + dataset.save! + end end + compound.destroy + out = {} + out[:html] = render_to_string(layout: false, partial: "compound_articles_panel", locals: { issue_id: issue_id }) + out[:datasets] = render_to_string(layout: false, partial: "manage_datasets_content_show_page") + render json: out + end - ## - # Retrieve and display a random sample of the result of a search - def random_sample - search_params = session['search_params'].with_indifferent_access - search_params[:fq] = search_params[:fq].select {|elt| !elt.start_with? "has_model_ssim:" } if search_params[:fq] - search_params[:fq] ||= [] - search_params[:fq] << "has_model_ssim:Article" - search_params[:sort] = "rand#{(0...8).map { (65 + rand(26)).chr }.join} asc" - results = SolrSearcher.query search_params - results = results['response']['docs'].map do |solr_doc| - case solr_doc['has_model_ssim'] - when ['Article'] - Article.from_solr_doc solr_doc - when ['Issue'] - Issue.from_solr_doc solr_doc - end - end - render json: {content: render_to_string(layout: false, partial: "random_sample", locals: {resulting_docs: results}) } + ## + # Retrieve and display a random sample of the result of a search + def random_sample + search_params = session["search_params"].with_indifferent_access + search_params[:fq] = search_params[:fq].select { |elt| !elt.start_with? "has_model_ssim:" } if search_params[:fq] + search_params[:fq] ||= [] + search_params[:fq] << "has_model_ssim:Article" + search_params[:sort] = "rand#{(0...8).map { (65 + rand(26)).chr }.join} asc" + results = SolrSearcher.query search_params + results = results["response"]["docs"].map do |solr_doc| + case solr_doc["has_model_ssim"] + when ["Article"] + Article.from_solr_doc solr_doc + when ["Issue"] + Issue.from_solr_doc solr_doc + end end + render json: { content: render_to_string(layout: false, partial: "random_sample", locals: { resulting_docs: results }) } + end - private + private - def strip_input_fields - params.each do |key, value| - params[key] = value.strip if value.respond_to?("strip") - end + def strip_input_fields + params.each do |key, value| + params[key] = value.strip if value.respond_to?("strip") end + end end diff --git a/app/controllers/dataset_controller.rb b/app/controllers/dataset_controller.rb index e04f4b4..ca0fbbc 100644 --- a/app/controllers/dataset_controller.rb +++ b/app/controllers/dataset_controller.rb @@ -1,194 +1,203 @@ class DatasetController < ApplicationController - - before_action :authenticate_user! - - ## - # List all datasets - def index - end - - ## - # Display a single dataset - def show - @dataset = Dataset.find(params[:id]) - @current_page = params[:page] || 1 - @per_page = params[:per_page] || 10 - session[:working_dataset] = @dataset.id - end - - ## - # Create a new empty dataset - def create_dataset - dataset = Dataset.new - dataset.user = current_user - dataset.title = params[:title] - begin - dataset.save! - render json: {status: 'ok'} - rescue ActiveRecord::RecordNotUnique - render json: {status: "error", message: "A dataset with this title already exists."} - rescue ActiveRecord::RecordInvalid - render json: {status: "error", message: "The title should not be blank."} - end - end - - ## - # Rename an existing dataset - def rename_dataset - dataset = Dataset.find(params[:id]) - dataset.title = params[:title] + before_action :authenticate_user! + + ## + # List all datasets + def index + puts "Listing datasets" + puts session.inspect + end + + ## + # Display a single dataset + def show + puts "Finding dataset with id " + String(params[:id]) + @dataset = Dataset.find(params[:id]) + @current_page = params[:page] || 1 + @per_page = params[:per_page] || 10 + session[:working_dataset] = @dataset.id + puts "The session now has working_dataset " + String(session[:working_dataset]) + end + + ## + # Create a new empty dataset + def create_dataset + dataset = Dataset.new + dataset.user = current_user + dataset.title = params[:title] + begin + dataset.save! + render json: { status: "ok" } + rescue ActiveRecord::RecordNotUnique + render json: { status: "error", message: "A dataset with this title already exists." } + rescue ActiveRecord::RecordInvalid + render json: { status: "error", message: "The title should not be blank." } + end + end + + ## + # Rename an existing dataset + def rename_dataset + dataset = Dataset.find(params[:id]) + dataset.title = params[:title] + begin + dataset.save! + render json: { status: "ok" } + rescue ActiveRecord::RecordNotUnique + render json: { status: "error", message: "A dataset with this title already exists." } + rescue ActiveRecord::RecordInvalid + render json: { status: "error", message: "The title should not be blank." } + end + end + + ## + # Import a public dataset + def import_dataset + to_copy = Dataset.find params[:original_dataset_id] + render json: { status: "error", message: "This dataset is not public." } unless to_copy.public? + new_dataset = Dataset.new + new_dataset.user_id = current_user.id + new_dataset.title = params[:title] + to_copy.documents.each do |doc| + if doc["type"] == "compound" + ca = CompoundArticle.find(doc["id"]).dup + ca.user = current_user begin - dataset.save! - render json: {status: 'ok'} + ca.save! rescue ActiveRecord::RecordNotUnique - render json: {status: "error", message: "A dataset with this title already exists."} - rescue ActiveRecord::RecordInvalid - render json: {status: "error", message: "The title should not be blank."} + ca.title = "_#{(0...8).map { (65 + rand(26)).chr }.join}_#{ca.title}" + ca.save! end - end - - ## - # Import a public dataset - def import_dataset - to_copy = Dataset.find params[:original_dataset_id] - render json: {status: "error", message: "This dataset is not public."} unless to_copy.public? - new_dataset = Dataset.new - new_dataset.user_id = current_user.id - new_dataset.title = params[:title] - to_copy.documents.each do |doc| - if doc['type'] == "compound" - ca = CompoundArticle.find(doc['id']).dup - ca.user = current_user - begin - ca.save! - rescue ActiveRecord::RecordNotUnique - ca.title = "_#{(0...8).map { (65 + rand(26)).chr }.join}_#{ca.title}" - ca.save! - end - new_dataset.documents << {id: ca.id, type: "compound"} - else - new_dataset.documents << doc - end - end - begin - new_dataset.save! - render json: {status: 'ok'} - rescue ActiveRecord::RecordNotUnique - render json: {status: "error", message: "A dataset with this title already exists."} - rescue ActiveRecord::RecordInvalid - render json: {status: "error", message: "The title should not be blank."} - end - end - - ## - # Delete an existing dataset - def delete_dataset - dataset = Dataset.find(params[:dataset_id]) - dataset_id = dataset.id - dataset.destroy - if session[:working_dataset] == dataset_id - if current_user.datasets.first - session[:working_dataset] = current_user.datasets.first.id - else - session[:working_dataset] = nil - end - end - end - - ## - # Update the view of the list of datasets - def update_datasets_list - respond_to do |format| - format.js - end - end - - ## - # - def set_working_dataset - session[:working_dataset] = params[:dataset_id] - @title = Dataset.find(session[:working_dataset]).title - respond_to do |format| - format.js - end - end - - def add_selected_documents - out = {} - @nb_added_docs = params[:documents_ids].size - dataset = Dataset.find(session[:working_dataset]) - existing = dataset.add_documents params[:documents_ids] # Add docs and return existing ids - @nb_added_docs -= existing.size - title = dataset.title - message = "

#{@nb_added_docs} document#{@nb_added_docs > 1 ? "s were" : " was"} added to your dataset.

" - message.concat "

#{existing.size} document#{existing.size > 1 ? "s" : ""} already exist in this dataset.

" unless existing.empty? - # render partial: "shared/notification", locals: {notif_title: title, notif_content: message.html_safe} - out['notif'] = render_to_string layout: false, partial: "shared/notification", locals: {notif_title: title, notif_content: message.html_safe} - out['nbissues'] = dataset.documents.select{|d| d['type'] == "issue" }.size - out['nbarticles'] = dataset.documents.select{|d| d['type'] == "article" }.size - out['nbdocs'] = out['nbissues'] + out['nbarticles'] - out['title'] = title - out['results_datasets'] = params[:documents_ids].map{ |docid| [docid, render_to_string(layout: false, partial: 'catalog/result_datasets', locals: {doc_id: docid})] }.to_h - render json: out - end - - def add_compound - out = {} - dataset = Dataset.find(session[:working_dataset]) - existing = dataset.add_compound params[:compound_id] # Add docs and return existing ids - title = dataset.title - message = "

The compound article was added to your dataset.

" - out['notif'] = render_to_string layout: false, partial: "shared/notification", locals: {notif_title: title, notif_content: message.html_safe} - out['nbissues'] = dataset.documents.select{|d| d['type'] == "issue" }.size - out['nbarticles'] = dataset.documents.select{|d| d['type'] == "article" }.size - out['nbcompounds'] = dataset.documents.select{|d| d['type'] == "compound" }.size - out['nbdocs'] = out['nbissues'] + out['nbarticles'] + out['nbcompounds'] - out['title'] = title - render json: out - end - - def remove_selected_documents - @nb_removed_docs = params[:documents_ids].size - dataset = Dataset.find(session[:working_dataset]) - dataset.remove_documents params[:documents_ids] - redirect_to action: "show", id: dataset.id - end - - def add_all_documents - SearchToDatasetWorker.perform_async(current_user.id, session[:working_dataset], params[:search_params].to_unsafe_h) - title = Dataset.find(session[:working_dataset]).title - message = "

Documents are being added to your dataset. You will be notified when the operation is done.

" - render partial: "shared/notification", locals: {notif_title: title, notif_content: message.html_safe} - end - - def export_dataset - ExportDatasetWorker.perform_async(current_user.id, params[:dataset_id], params[:export_type]) - title = Dataset.find(params[:dataset_id]).title - message = "

The export is being prepared. You will be notified when the operation is done.

" - render partial: "shared/notification", locals: {notif_title: title, notif_content: message.html_safe} - end - - def toggle_sharing_status - @dataset = Dataset.find(params[:dataset_id]) - @dataset.toggle!(:public) - render partial: 'dataset_info' - end - - def paginate - out = {} - d = Dataset.find params['id'] - rows = params[:per_page].to_i - res = d.fetch_paginated_documents(params[:page].to_i, rows, params[:sort], params[:sort_order], params[:type]) - out[:documents] = render_to_string(layout: false, - partial: "documents", - locals: {docs: res[:docs], rows: rows, pagenum: params[:page].to_i}) - out[:pagination] = render_to_string(layout: false, - partial: "pagination", - locals: {nb_pages: res[:nb_pages].to_i, current_page: params[:page].to_i}) - render json: out - end - - def list_datasets - render json: current_user.datasets.to_json - end + new_dataset.documents << { id: ca.id, type: "compound" } + else + new_dataset.documents << doc + end + end + begin + new_dataset.save! + render json: { status: "ok" } + rescue ActiveRecord::RecordNotUnique + render json: { status: "error", message: "A dataset with this title already exists." } + rescue ActiveRecord::RecordInvalid + render json: { status: "error", message: "The title should not be blank." } + end + end + + ## + # Delete an existing dataset + def delete_dataset + dataset = Dataset.find(params[:dataset_id]) + dataset_id = dataset.id + dataset.destroy + if session[:working_dataset] == dataset_id + if current_user.datasets.first + session[:working_dataset] = current_user.datasets.first.id + else + session[:working_dataset] = nil + end + end + end + + ## + # Update the view of the list of datasets + def update_datasets_list + respond_to do |format| + format.js + end + end + + ## + # + def set_working_dataset + session[:working_dataset] = params[:dataset_id] + @title = Dataset.find(session[:working_dataset]).title + puts "Setting working dataset to " + String(session[:working_dataset]) + puts session.inspect + respond_to do |format| + format.js + end + end + + def add_selected_documents + puts session.inspect + puts "User id from session is " + String(session[:current_user_id]) + " and working dataset is " + String(session[:working_dataset]) + out = {} + @nb_added_docs = params[:documents_ids].size + dataset = Dataset.find(session[:working_dataset]) + existing = dataset.add_documents params[:documents_ids] # Add docs and return existing ids + @nb_added_docs -= existing.size + title = dataset.title + message = "

#{@nb_added_docs} document#{@nb_added_docs > 1 ? "s were" : " was"} added to your dataset.

" + message.concat "

#{existing.size} document#{existing.size > 1 ? "s" : ""} already exist in this dataset.

" unless existing.empty? + # render partial: "shared/notification", locals: {notif_title: title, notif_content: message.html_safe} + out["notif"] = render_to_string layout: false, partial: "shared/notification", locals: { notif_title: title, notif_content: message.html_safe } + out["nbissues"] = dataset.documents.select { |d| d["type"] == "issue" }.size + out["nbarticles"] = dataset.documents.select { |d| d["type"] == "article" }.size + out["nbdocs"] = out["nbissues"] + out["nbarticles"] + out["title"] = title + out["results_datasets"] = params[:documents_ids].map { |docid| [docid, render_to_string(layout: false, partial: "catalog/result_datasets", locals: { doc_id: docid })] }.to_h + render json: out + end + + def add_compound + out = {} + dataset = Dataset.find(session[:working_dataset]) + # dataset = Dataset.find(params[:working_dataset]) + existing = dataset.add_compound params[:compound_id] # Add docs and return existing ids + title = dataset.title + message = "

The compound article was added to your dataset.

" + out["notif"] = render_to_string layout: false, partial: "shared/notification", locals: { notif_title: title, notif_content: message.html_safe } + out["nbissues"] = dataset.documents.select { |d| d["type"] == "issue" }.size + out["nbarticles"] = dataset.documents.select { |d| d["type"] == "article" }.size + out["nbcompounds"] = dataset.documents.select { |d| d["type"] == "compound" }.size + out["nbdocs"] = out["nbissues"] + out["nbarticles"] + out["nbcompounds"] + out["title"] = title + render json: out + end + + def remove_selected_documents + @nb_removed_docs = params[:documents_ids].size + puts "Remove selected documents called, session has working dataset with id " + String(session[:working_dataset]) + dataset = Dataset.find(session[:working_dataset]) + dataset.remove_documents params[:documents_ids] + redirect_to action: "show", id: dataset.id + end + + def add_all_documents + SearchToDatasetWorker.perform_async(current_user.id, session[:working_dataset], params[:search_params].to_unsafe_h) + title = Dataset.find(session[:working_dataset]).title + message = "

Documents are being added to your dataset. You will be notified when the operation is done.

" + render partial: "shared/notification", locals: { notif_title: title, notif_content: message.html_safe } + end + + def export_dataset + ExportDatasetWorker.perform_async(current_user.id, params[:dataset_id], params[:export_type]) + title = Dataset.find(params[:dataset_id]).title + message = "

The export is being prepared. You will be notified when the operation is done.

" + render partial: "shared/notification", locals: { notif_title: title, notif_content: message.html_safe } + end + + def toggle_sharing_status + @dataset = Dataset.find(params[:dataset_id]) + @dataset.toggle!(:public) + render partial: "dataset_info" + end + + def paginate + out = {} + d = Dataset.find params["id"] + rows = params[:per_page].to_i + res = d.fetch_paginated_documents(params[:page].to_i, rows, params[:sort], params[:sort_order], params[:type]) + out[:documents] = render_to_string(layout: false, + partial: "documents", + locals: { docs: res[:docs], rows: rows, pagenum: params[:page].to_i }) + out[:pagination] = render_to_string(layout: false, + partial: "pagination", + locals: { nb_pages: res[:nb_pages].to_i, current_page: params[:page].to_i }) + render json: out + end + + def list_datasets + render json: current_user.datasets.to_json + end end diff --git a/app/controllers/experiment_controller.rb b/app/controllers/experiment_controller.rb index 5c89c48..61d9bce 100644 --- a/app/controllers/experiment_controller.rb +++ b/app/controllers/experiment_controller.rb @@ -1,118 +1,117 @@ class ExperimentController < ApplicationController + before_action :authenticate_user! - before_action :authenticate_user! + def index + end - def index + def create + experiment = Experiment.new + experiment.user = current_user + experiment.title = params[:title] + begin + experiment.save! + render json: { status: "ok" } + rescue ActiveRecord::RecordNotUnique + render json: { status: "error", message: "An experiment with this title already exists." } + rescue ActiveRecord::RecordInvalid + render json: { status: "error", message: "The title should not be blank." } end + end - def create - experiment = Experiment.new - experiment.user = current_user - experiment.title = params[:title] - begin - experiment.save! - render json: {status: 'ok'} - rescue ActiveRecord::RecordNotUnique - render json: {status: "error", message: "An experiment with this title already exists."} - rescue ActiveRecord::RecordInvalid - render json: {status: "error", message: "The title should not be blank."} - end + def delete + experiment = Experiment.find(params[:experiment_id]) + root_ids = experiment.description["children"].map { |root| root["tool"]["id"] } + root_ids.each do |root_id| + Tool.destroy(experiment.delete_tool(root_id)) end + experiment.destroy + end - def delete - experiment = Experiment.find(params[:experiment_id]) - root_ids = experiment.description["children"].map{|root| root['tool']['id'] } - root_ids.each do |root_id| - Tool.destroy(experiment.delete_tool(root_id)) - end - experiment.destroy - end - - def show - @experiment = Experiment.find params[:id] - @tools = @experiment.load_tools - @tools = JSON.parse(File.read("#{Rails.root}/lib/newspapers_tools.json")) - @tools['tools']['processors'].delete_if{ |h| h["type"] == "splitter" } - end + def show + @experiment = Experiment.find params[:id] + @tools = @experiment.load_tools + @tools = JSON.parse(File.read("#{Rails.root}/lib/newspapers_tools.json")) + @tools["tools"]["processors"].delete_if { |h| h["type"] == "splitter" } + end - def update_experiments_list - respond_to do |format| - format.js - end + def update_experiments_list + respond_to do |format| + format.js end + end - def add_tool - @experiment = Experiment.find(params[:id]) - tool_params = JSON.parse params[:tool] - tool = Tool.new - tool.tool_type = tool_params['type'] - tool.input_type = tool_params['input_type'] - tool.output_type = tool_params['output_type'] - tool.parameters = tool_params['parameters'] - tool.status = "created" - tool.parent_id = params[:parent_id]#(params[:parent_id] == "") ? nil : Tool.find(params[:parent_id]) - tool.experiment = @experiment - tool.save! - @experiment.add_tool(params[:parent_id].to_i, tool) - @experiment.save! - render 'experiment/update_experiment_area' - end + def add_tool + @experiment = Experiment.find(params[:id]) + tool_params = JSON.parse params[:tool] + tool = Tool.new + tool.tool_type = tool_params["type"] + tool.input_type = tool_params["input_type"] + tool.output_type = tool_params["output_type"] + tool.parameters = tool_params["parameters"] + tool.status = "created" + tool.parent_id = params[:parent_id] #(params[:parent_id] == "") ? nil : Tool.find(params[:parent_id]) + tool.experiment = @experiment + tool.save! + @experiment.add_tool(params[:parent_id].to_i, tool) + @experiment.save! + render "experiment/update_experiment_area" + end - def delete_tool - @experiment = Experiment.find(params[:id]) - tools_to_destroy_ids = @experiment.delete_tool(params[:tool_id].to_i) - @experiment.save! - Tool.destroy(tools_to_destroy_ids) - render 'experiment/update_experiment_area' - end + def delete_tool + @experiment = Experiment.find(params[:id]) + tools_to_destroy_ids = @experiment.delete_tool(params[:tool_id].to_i) + @experiment.save! + Tool.destroy(tools_to_destroy_ids) + render "experiment/update_experiment_area" + end - def edit_tool_form - @tool = Tool.find(params[:tool_id]) - render partial: 'tool/parameters', locals: {tool: @tool} - end + def edit_tool_form + @tool = Tool.find(params[:tool_id]) + render partial: "tool/parameters", locals: { tool: @tool } + end - def edit_tool - @experiment = Experiment.find(params[:id]) - @tool = Tool.find(params[:tool_id]) - modified = false - @tool.parameters.map! do |param| - if param['value'] != params[:parameters][param['name']] - modified = true - end - param['value'] = params[:parameters][param['name']] - param - end - @tool.status = "configured" if modified - @tool.save! - render 'experiment/update_experiment_area' + def edit_tool + @experiment = Experiment.find(params[:id]) + @tool = Tool.find(params[:tool_id]) + modified = false + @tool.parameters.map! do |param| + if param["value"] != params[:parameters][param["name"]] + modified = true + end + param["value"] = params[:parameters][param["name"]] + param end + @tool.status = "configured" if modified + @tool.save! + render "experiment/update_experiment_area" + end - def tool_results - @experiment = Experiment.find(params[:id]) - @tool = Tool.find(params[:tool_id]) - render partial: 'tool/results', locals: {tool: @tool, experiment: @experiment} - end + def tool_results + @experiment = Experiment.find(params[:id]) + @tool = Tool.find(params[:tool_id]) + render partial: "tool/results", locals: { tool: @tool, experiment: @experiment } + end - def run_tool - @experiment = Experiment.find(params[:id]) - @tool = Tool.find(params[:tool_id]) - @tool.run() - render 'experiment/update_experiment_area' - end + def run_tool + @experiment = Experiment.find(params[:id]) + @tool = Tool.find(params[:tool_id]) + @tool.run() + render "experiment/update_experiment_area" + end - def run_experiment - out = {} - @experiment = Experiment.find(params[:experiment_id]) - ids = @experiment.get_tool_ids - running = false - ids.map{|id| Tool.find(id)}.each do |tool| - if tool.runnable? - tool.run(true) - running = true - end - end - out[:html_tree] = render_to_string partial: "tree", locals: {experiment: @experiment} - out[:experiment_running] = running - render json: out + def run_experiment + out = {} + @experiment = Experiment.find(params[:experiment_id]) + ids = @experiment.get_tool_ids + running = false + ids.map { |id| Tool.find(id) }.each do |tool| + if tool.runnable? + tool.run(true) + running = true + end end + out[:html_tree] = render_to_string partial: "tree", locals: { experiment: @experiment } + out[:experiment_running] = running + render json: out + end end diff --git a/app/controllers/notification_controller.rb b/app/controllers/notification_controller.rb index 53306f0..43e7a44 100644 --- a/app/controllers/notification_controller.rb +++ b/app/controllers/notification_controller.rb @@ -1,3 +1,3 @@ class NotificationController < ApplicationController - -end \ No newline at end of file + before_action :authenticate_user! +end diff --git a/app/controllers/tool_controller.rb b/app/controllers/tool_controller.rb index d9fbf32..2e047a9 100644 --- a/app/controllers/tool_controller.rb +++ b/app/controllers/tool_controller.rb @@ -1,26 +1,21 @@ class ToolController < ApplicationController + before_action :authenticate_user! - before_action :authenticate_user! + def show + end - def show + def create + end - end + def update + end - def create + def destroy + end - end + private - def update - - end - - def destroy - - end - - private - - def tool_params - params.require(:tool).permit(:parameters, :results, :status) - end + def tool_params + params.require(:tool).permit(:parameters, :results, :status) + end end diff --git a/app/helpers/application_helper.rb b/app/helpers/application_helper.rb index a69ed06..15f2511 100644 --- a/app/helpers/application_helper.rb +++ b/app/helpers/application_helper.rb @@ -1,7 +1,5 @@ module ApplicationHelper - - def set_page_title(title) - content_for :page_title, title - end - + def set_page_title(title) + content_for :page_title, title + end end diff --git a/app/helpers/experiment_helper.rb b/app/helpers/experiment_helper.rb index f03f142..9bc2e4c 100644 --- a/app/helpers/experiment_helper.rb +++ b/app/helpers/experiment_helper.rb @@ -1,17 +1,15 @@ module ExperimentHelper - - def recursive_display(tree, tools) - if tree.has_key? "tool" - concat "
  • ".html_safe - concat render partial: 'tool/canvas_tool', locals: {tool: tools[tree['tool']['id']]} - concat "".html_safe - concat "
  • ".html_safe if tree.has_key? "tool" + def recursive_display(tree, tools) + if tree.has_key? "tool" + concat "
  • ".html_safe + concat render partial: "tool/canvas_tool", locals: { tool: tools[tree["tool"]["id"]] } + concat "".html_safe + concat "
  • ".html_safe if tree.has_key? "tool" + end +end diff --git a/app/helpers/named_entities_helper.rb b/app/helpers/named_entities_helper.rb index 0419623..e9c2536 100644 --- a/app/helpers/named_entities_helper.rb +++ b/app/helpers/named_entities_helper.rb @@ -1,40 +1,38 @@ module NamedEntitiesHelper - - def get_linked_entities entities - priority_language = [I18n.locale, 'en', 'de', 'fr', 'fi', 'sv'] - ids = entities.select{ |label| label != "" && label != nil } - return {} if ids.empty? - out = {} - SolrSearcher.query({q: "*:*", fq: "id:(#{ids.join(' ')})", fl: "*", rows: 99999})['response']['docs'].map do |res| - priority_language.each do |lang| - unless res["label_#{lang}_ssi"].nil? - out[res['id']] = {kb_url: res['kb_url_ssi'], label: res["label_#{lang}_ssi"]} - break - end - end + def get_linked_entities(entities) + priority_language = [I18n.locale, "en", "de", "fr", "fi", "sv"] + ids = entities.select { |label| label != "" && label != nil } + return {} if ids.empty? + out = {} + SolrSearcher.query({ q: "*:*", fq: "id:(#{ids.join(" ")})", fl: "*", rows: 99999 })["response"]["docs"].map do |res| + priority_language.each do |lang| + unless res["label_#{lang}_ssi"].nil? + out[res["id"]] = { kb_url: res["kb_url_ssi"], label: res["label_#{lang}_ssi"] } + break end - out + end end + out + end - def get_entity_label(options={}) - priority_language = [I18n.locale, 'en', 'de', 'fr', 'fi', 'sv'] - if options.class == Array - out = {} - unless options.empty? - docs = SolrSearcher.query({q: "*:*", fq: "id:(#{options.join(' ')})", fl: "*", rows: 99999})['response']['docs'] - docs.map do |doc| - priority_language.each do |lang| - unless doc["label_#{lang}_ssi"].nil? - out[doc['id']] = doc["label_#{lang}_ssi"] - break - end - end - end + def get_entity_label(options = {}) + priority_language = [I18n.locale, "en", "de", "fr", "fi", "sv"] + if options.class == Array + out = {} + unless options.empty? + docs = SolrSearcher.query({ q: "*:*", fq: "id:(#{options.join(" ")})", fl: "*", rows: 99999 })["response"]["docs"] + docs.map do |doc| + priority_language.each do |lang| + unless doc["label_#{lang}_ssi"].nil? + out[doc["id"]] = doc["label_#{lang}_ssi"] + break end - return out - else - @entities_labels[options] # set in catalog_controller#index + end end + end + return out + else + @entities_labels[options] # set in catalog_controller#index end - -end \ No newline at end of file + end +end diff --git a/app/helpers/search_helper.rb b/app/helpers/search_helper.rb index d740570..e9eb2ac 100644 --- a/app/helpers/search_helper.rb +++ b/app/helpers/search_helper.rb @@ -1,37 +1,35 @@ module SearchHelper + def current_page_params + params.to_unsafe_h.slice("q", "page", "per_page", "sort", "search_type", "f") + end - def current_page_params - params.to_unsafe_h.slice('q', 'page', 'per_page','sort', 'search_type', 'f') + def merge_facets(parameters, new) + parameters.merge(new) do |key, oldval, newval| + oldval.merge(newval) end + end - def merge_facets(parameters, new) - parameters.merge(new) do |key, oldval, newval| - oldval.merge(newval) - end - end - - def convert_solr_date_to_datepicker_date solr_date - DateTime.parse(solr_date).strftime("%Y-%m-%d") - end + def convert_solr_date_to_datepicker_date(solr_date) + DateTime.parse(solr_date).strftime("%Y-%m-%d") + end - def convert_datepicker_date_to_solr_date solr_date - DateTime.parse(solr_date).strftime("%Y-%m-%d") - end + def convert_datepicker_date_to_solr_date(solr_date) + DateTime.parse(solr_date).strftime("%Y-%m-%d") + end - def search_constraints - constraints = [] - if current_page_params[:f] - current_page_params[:f].each do |f, vals| - if f == "date_created_dtsi" - constraints << {label: f, value: "From #{vals['from']} To #{vals['to']}"} - else - vals.each do |val| - constraints << {label: f, value: val} - end - end - end + def search_constraints + constraints = [] + if current_page_params[:f] + current_page_params[:f].each do |f, vals| + if f == "date_created_dtsi" + constraints << { label: f, value: "From #{vals["from"]} To #{vals["to"]}" } + else + vals.each do |val| + constraints << { label: f, value: val } + end end - constraints + end end - -end \ No newline at end of file + constraints + end +end diff --git a/app/mailers/application_mailer.rb b/app/mailers/application_mailer.rb index 286b223..3c34c81 100644 --- a/app/mailers/application_mailer.rb +++ b/app/mailers/application_mailer.rb @@ -1,4 +1,4 @@ class ApplicationMailer < ActionMailer::Base - default from: 'from@example.com' - layout 'mailer' + default from: "from@example.com" + layout "mailer" end diff --git a/app/models/article.rb b/app/models/article.rb index 145c405..27451ce 100644 --- a/app/models/article.rb +++ b/app/models/article.rb @@ -1,100 +1,99 @@ class Article + attr_accessor :id, :title, :all_text, :date_created, :language, :canvases_parts, :newspaper, :issue_id, :thumbnail_url, :bbox - attr_accessor :id, :title, :all_text, :date_created, :language, :canvases_parts, :newspaper, :issue_id, :thumbnail_url, :bbox + def self.from_solr(id) + solr_doc = SolrSearcher.get_doc_by_id id + Article.from_solr_doc solr_doc + end - def self.from_solr id - solr_doc = SolrSearcher.get_doc_by_id id - Article.from_solr_doc solr_doc - end + def self.from_solr_doc(solr_doc) + a = Article.new + a.id = solr_doc["id"] + a.title = solr_doc["title_ssi"] + a.language = solr_doc["language_ssi"] + a.all_text = solr_doc["all_text_t#{a.language}_siv"] + a.date_created = solr_doc["date_created_ssi"] + a.issue_id = solr_doc["from_issue_ssi"] + a.newspaper = solr_doc["member_of_collection_ids_ssim"].first + a.thumbnail_url = solr_doc["thumbnail_url_ss"] + a.canvases_parts = solr_doc["canvases_parts_ssm"] + a.bbox = a.get_location + a + end - def self.from_solr_doc solr_doc - a = Article.new - a.id = solr_doc['id'] - a.title = solr_doc['title_ssi'] - a.language = solr_doc['language_ssi'] - a.all_text = solr_doc["all_text_t#{a.language}_siv"] - a.date_created = solr_doc['date_created_ssi'] - a.issue_id = solr_doc['from_issue_ssi'] - a.newspaper = solr_doc['member_of_collection_ids_ssim'].first - a.thumbnail_url = solr_doc['thumbnail_url_ss'] - a.canvases_parts = solr_doc['canvases_parts_ssm'] - a.bbox = a.get_location - a - end + def to_solr(page_iiif_url) + solr_doc = {} + solr_doc["id"] = self.id + solr_doc["title_ssi"] = self.title + solr_doc["language_ssi"] = self.language + solr_doc["all_text_t#{self.language}_siv"] = self.all_text + solr_doc["all_text_unstemmed_t#{self.language}_siv"] = self.all_text + solr_doc["date_created_ssi"] = self.date_created + solr_doc["date_created_dtsi"] = DateTime.parse(self.date_created).strftime("%Y-%m-%dT%H:%M:%SZ") + solr_doc["year_isi"] = solr_doc["date_created_ssi"][0..3].to_i + d = DateTime.parse solr_doc["date_created_dtsi"] + solr_doc["month_isi"] = d.month + solr_doc["day_isi"] = d.wday + solr_doc["from_issue_ssi"] = self.issue_id + solr_doc["member_of_collection_ids_ssim"] = self.newspaper + solr_doc["canvases_parts_ssm"] = self.canvases_parts + solr_doc["thumbnail_url_ss"] = self.get_iiif_url(page_iiif_url) + solr_doc["has_model_ssim"] = "Article" + solr_doc + end - def to_solr(page_iiif_url) - solr_doc = {} - solr_doc['id'] = self.id - solr_doc['title_ssi'] = self.title - solr_doc["language_ssi"] = self.language - solr_doc["all_text_t#{self.language}_siv"] = self.all_text - solr_doc["all_text_unstemmed_t#{self.language}_siv"] = self.all_text - solr_doc['date_created_ssi'] = self.date_created - solr_doc['date_created_dtsi'] = DateTime.parse(self.date_created).strftime('%Y-%m-%dT%H:%M:%SZ') - solr_doc['year_isi'] = solr_doc['date_created_ssi'][0..3].to_i - d = DateTime.parse solr_doc["date_created_dtsi"] - solr_doc['month_isi'] = d.month - solr_doc['day_isi'] = d.wday - solr_doc['from_issue_ssi'] = self.issue_id - solr_doc['member_of_collection_ids_ssim'] = self.newspaper - solr_doc['canvases_parts_ssm'] = self.canvases_parts - solr_doc['thumbnail_url_ss'] = self.get_iiif_url(page_iiif_url) - solr_doc['has_model_ssim'] = 'Article' - solr_doc + def get_thumbnail + if Rails.configuration.iiif_sources[:local].include? self.newspaper + pagenum = self.canvases_parts[0][self.canvases_parts[0].rindex("_") + 1...self.canvases_parts[0].rindex("#")].to_i + self.get_iiif_url("https://iiif.newseye.eu/iiif/#{self.newspaper}/#{self.issue_id}_page_#{pagenum}.ptif") + elsif Rails.configuration.iiif_sources[:external].include? self.newspaper + self.thumbnail_url + elsif Rails.configuration.iiif_sources[:external_onb].include? self.newspaper + self.thumbnail_url end + end - def get_thumbnail - if Rails.configuration.iiif_sources[:local].include? self.newspaper - pagenum = self.canvases_parts[0][self.canvases_parts[0].rindex('_')+1...self.canvases_parts[0].rindex('#')].to_i - self.get_iiif_url("https://iiif.newseye.eu/iiif/#{self.newspaper}/#{self.issue_id}_page_#{pagenum}.ptif") - elsif Rails.configuration.iiif_sources[:external].include? self.newspaper - self.thumbnail_url - elsif Rails.configuration.iiif_sources[:external_onb].include? self.newspaper - self.thumbnail_url - end - end + def get_location + coords = self.canvases_parts.map { |c| c[c.rindex("#xywh=") + 6..-1].split(",").map(&:to_i) } + min_x = coords.map { |coord| coord[0] }.min + max_x = coords.map { |coord| coord[0] + coord[2] }.max + min_y = coords.map { |coord| coord[1] }.min + max_y = coords.map { |coord| coord[1] + coord[3] }.max + canvas_coords = [min_x, max_x, min_y, max_y] + canvas_size = [canvas_coords[1] - canvas_coords[0], canvas_coords[3] - canvas_coords[2]] + [min_x, min_y, canvas_size[0], canvas_size[1]] + end - def get_location - coords = self.canvases_parts.map { |c| c[c.rindex('#xywh=')+6..-1].split(',').map(&:to_i) } - min_x = coords.map{ |coord| coord[0] }.min - max_x = coords.map{ |coord| coord[0] + coord[2] }.max - min_y = coords.map{ |coord| coord[1] }.min - max_y = coords.map{ |coord| coord[1] + coord[3] }.max - canvas_coords = [min_x, max_x, min_y, max_y] - canvas_size = [canvas_coords[1]-canvas_coords[0], canvas_coords[3]-canvas_coords[2]] - [min_x,min_y,canvas_size[0],canvas_size[1]] - end + def get_iiif_url(page_iiif_url) + canvas_url = self.canvases_parts[0] + coords = self.canvases_parts.map { |c| c[c.rindex("#xywh=") + 6..-1].split(",").map(&:to_i) } + min_x = coords.map { |coord| coord[0] }.min + max_x = coords.map { |coord| coord[0] + coord[2] }.max + min_y = coords.map { |coord| coord[1] }.min + max_y = coords.map { |coord| coord[1] + coord[3] }.max + pagenum = canvas_url[canvas_url.rindex("_") + 1...canvas_url.rindex("#")].to_i + "#{page_iiif_url}/#{min_x},#{min_y},#{max_x - min_x},#{max_y - min_y}/full/0/default.jpg" + end - def get_iiif_url(page_iiif_url) - canvas_url = self.canvases_parts[0] - coords = self.canvases_parts.map { |c| c[c.rindex('#xywh=')+6..-1].split(',').map(&:to_i) } - min_x = coords.map{ |coord| coord[0] }.min - max_x = coords.map{ |coord| coord[0] + coord[2] }.max - min_y = coords.map{ |coord| coord[1] }.min - max_y = coords.map{ |coord| coord[1] + coord[3] }.max - pagenum = canvas_url[canvas_url.rindex('_')+1...canvas_url.rindex('#')].to_i - "#{page_iiif_url}/#{min_x},#{min_y},#{max_x-min_x},#{max_y-min_y}/full/0/default.jpg" + def self.named_entities(article_id) + nems = SolrSearcher.query({ q: "article_id_ssi:#{article_id}", rows: 1000000 })["response"]["docs"] + output = { LOC: {}, PER: {}, ORG: {}, HumanProd: {} } + nems.select { |ne_solr| ne_solr["type_ssi"] == "LOC" }.each do |ne_solr| + output[:LOC][ne_solr["linked_entity_ssi"]] = [] unless output[:LOC].has_key? ne_solr["linked_entity_ssi"] + output[:LOC][ne_solr["linked_entity_ssi"]].append(ne_solr) end - - def self.named_entities(article_id) - nems = SolrSearcher.query({q:"article_id_ssi:#{article_id}", rows: 1000000})['response']['docs'] - output = {LOC: {}, PER: {}, ORG: {}, HumanProd: {}} - nems.select {|ne_solr| ne_solr['type_ssi'] == "LOC"}.each do |ne_solr| - output[:LOC][ne_solr['linked_entity_ssi']] = [] unless output[:LOC].has_key? ne_solr['linked_entity_ssi'] - output[:LOC][ne_solr['linked_entity_ssi']].append(ne_solr) - end - nems.select {|ne_solr| ne_solr['type_ssi'] == "PER"}.each do |ne_solr| - output[:PER][ne_solr['linked_entity_ssi']] = [] unless output[:PER].has_key? ne_solr['linked_entity_ssi'] - output[:PER][ne_solr['linked_entity_ssi']].append(ne_solr) - end - nems.select {|ne_solr| ne_solr['type_ssi'] == "ORG"}.each do |ne_solr| - output[:ORG][ne_solr['linked_entity_ssi']] = [] unless output[:ORG].has_key? ne_solr['linked_entity_ssi'] - output[:ORG][ne_solr['linked_entity_ssi']].append(ne_solr) - end - nems.select {|ne_solr| ne_solr['type_ssi'] == "HumanProd"}.each do |ne_solr| - output[:HumanProd][ne_solr['linked_entity_ssi']] = [] unless output[:HumanProd].has_key? ne_solr['linked_entity_ssi'] - output[:HumanProd][ne_solr['linked_entity_ssi']].append(ne_solr) - end - output + nems.select { |ne_solr| ne_solr["type_ssi"] == "PER" }.each do |ne_solr| + output[:PER][ne_solr["linked_entity_ssi"]] = [] unless output[:PER].has_key? ne_solr["linked_entity_ssi"] + output[:PER][ne_solr["linked_entity_ssi"]].append(ne_solr) + end + nems.select { |ne_solr| ne_solr["type_ssi"] == "ORG" }.each do |ne_solr| + output[:ORG][ne_solr["linked_entity_ssi"]] = [] unless output[:ORG].has_key? ne_solr["linked_entity_ssi"] + output[:ORG][ne_solr["linked_entity_ssi"]].append(ne_solr) + end + nems.select { |ne_solr| ne_solr["type_ssi"] == "HumanProd" }.each do |ne_solr| + output[:HumanProd][ne_solr["linked_entity_ssi"]] = [] unless output[:HumanProd].has_key? ne_solr["linked_entity_ssi"] + output[:HumanProd][ne_solr["linked_entity_ssi"]].append(ne_solr) end -end \ No newline at end of file + output + end +end diff --git a/app/models/compound_article.rb b/app/models/compound_article.rb index 5372b31..dec0311 100644 --- a/app/models/compound_article.rb +++ b/app/models/compound_article.rb @@ -1,6 +1,4 @@ class CompoundArticle < ActiveRecord::Base - - belongs_to :user, optional: false - validates :title, length: { minimum: 1 } - -end \ No newline at end of file + belongs_to :user, optional: false + validates :title, length: { minimum: 1 } +end diff --git a/app/models/concerns/abstract_searcher.rb b/app/models/concerns/abstract_searcher.rb index 0bced80..d569b1d 100644 --- a/app/models/concerns/abstract_searcher.rb +++ b/app/models/concerns/abstract_searcher.rb @@ -1,12 +1,11 @@ module AbstractSearcher - extend ActiveSupport::Concern + extend ActiveSupport::Concern - def self.query - raise NotImplementedError, "Subclasses must define `query`." - end + def self.query + raise NotImplementedError, "Subclasses must define `query`." + end - def self.get_doc_by_id(id) - raise NotImplementedError, "Subclasses must define `get_doc_by_id`." - end - -end \ No newline at end of file + def self.get_doc_by_id(id) + raise NotImplementedError, "Subclasses must define `get_doc_by_id`." + end +end diff --git a/app/models/current.rb b/app/models/current.rb index 1211373..73a9744 100644 --- a/app/models/current.rb +++ b/app/models/current.rb @@ -1,3 +1,3 @@ class Current < ActiveSupport::CurrentAttributes - attribute :user + attribute :user end diff --git a/app/models/dataset.rb b/app/models/dataset.rb index 59b49ae..be88129 100644 --- a/app/models/dataset.rb +++ b/app/models/dataset.rb @@ -1,122 +1,121 @@ class Dataset < ActiveRecord::Base - # after_find :nb_issues, :nb_articles - belongs_to :user, optional: false - validates :title, length: { minimum: 1 } + # after_find :nb_issues, :nb_articles + belongs_to :user, optional: false + validates :title, length: { minimum: 1 } - def add_documents(documents_ids) - existing = [] - documents_ids.each do |doc_id| - if self.documents.any?{ |doc| doc['id'] == doc_id } - existing << doc_id - else - doc_type = doc_id.index("_article_").nil? ? "issue" : "article" - self.documents << {id: doc_id, type: doc_type} - end - end - self.save - return existing - end - - def add_compound(compound_id) - existing = [] - if self.documents.any?{ |doc| doc['id'] == compound_id } - existing << compound_id - else - doc_type = "compound" - self.documents << {id: compound_id, type: doc_type} - end - self.save - return existing + def add_documents(documents_ids) + existing = [] + documents_ids.each do |doc_id| + if self.documents.any? { |doc| doc["id"] == doc_id } + existing << doc_id + else + doc_type = doc_id.index("_article_").nil? ? "issue" : "article" + self.documents << { id: doc_id, type: doc_type } + end end + self.save + return existing + end - def remove_documents(documents_ids) - self.documents.delete_if{ |elt| documents_ids.include? elt['id'] } - self.save + def add_compound(compound_id) + existing = [] + if self.documents.any? { |doc| doc["id"] == compound_id } + existing << compound_id + else + doc_type = "compound" + self.documents << { id: compound_id, type: doc_type } end + self.save + return existing + end - def contains doc_id - self.documents.index { |doc| doc['id'] == doc_id }.nil? ? false : true - end + def remove_documents(documents_ids) + self.documents.delete_if { |elt| documents_ids.include? elt["id"] } + self.save + end - def nb_issues - self.documents.select do |doc| - doc['type'] == 'issue' - end.size - end + def contains(doc_id) + self.documents.index { |doc| doc["id"] == doc_id }.nil? ? false : true + end - def nb_articles - self.documents.select do |doc| - doc['type'] == 'article' - end.size - end + def nb_issues + self.documents.select do |doc| + doc["type"] == "issue" + end.size + end - def nb_compound_articles - self.documents.select do |doc| - doc['type'] == 'compound' - end.size - end + def nb_articles + self.documents.select do |doc| + doc["type"] == "article" + end.size + end - def fetch_paginated_documents(page, per_page, sort, sort_order, type, recursive=false) - docs = self.documents.select {|doc| type == "all" || doc['type'] == type } + def nb_compound_articles + self.documents.select do |doc| + doc["type"] == "compound" + end.size + end - nb_pages = (docs.size / per_page.to_f).ceil - nb_pages = 1 if nb_pages == 0 - sort = (sort == "default") ? "score" : sort - solr_docs = nil + def fetch_paginated_documents(page, per_page, sort, sort_order, type, recursive = false) + docs = self.documents.select { |doc| type == "all" || doc["type"] == type } - compounds_ids = docs.select{|d| d['type'] == "compound" }.map{ |d| d['id'] } - compound_articles = CompoundArticle.find(compounds_ids) + nb_pages = (docs.size / per_page.to_f).ceil + nb_pages = 1 if nb_pages == 0 + sort = (sort == "default") ? "score" : sort + solr_docs = nil + compounds_ids = docs.select { |d| d["type"] == "compound" }.map { |d| d["id"] } + compound_articles = CompoundArticle.find(compounds_ids) - solr_ids = docs.select{|d| d['type'] != "compound" }.map{ |d| d['id'] } - unless solr_ids.empty? - solr_docs = SolrSearcher.query({ + solr_ids = docs.select { |d| d["type"] != "compound" }.map { |d| d["id"] } + unless solr_ids.empty? + solr_docs = SolrSearcher.query({ q: "*:*", - fq: "id:(#{solr_ids.join(' ')})", + fq: "id:(#{solr_ids.join(" ")})", rows: per_page, sort: "#{sort} #{sort_order}", - start: (page-1)*per_page - })['response']['docs'] - solr_docs.map! do |solr_doc| - if solr_doc['id'].index("_article_").nil? - Issue.from_solr_doc solr_doc - else - Article.from_solr_doc solr_doc - end - end - end - if recursive and page < nb_pages and !solr_docs.nil? - solr_docs = solr_docs.concat fetch_paginated_documents(page+1, per_page, sort, sort_order, type, true)[:docs] + start: (page - 1) * per_page, + })["response"]["docs"] + solr_docs.map! do |solr_doc| + if solr_doc["id"].index("_article_").nil? + Issue.from_solr_doc solr_doc + else + Article.from_solr_doc solr_doc end - return {docs: solr_docs.nil? ? compound_articles : solr_docs+compound_articles, nb_pages: nb_pages} + end + end + if recursive and page < nb_pages and !solr_docs.nil? + solr_docs = solr_docs.concat fetch_paginated_documents(page + 1, per_page, sort, sort_order, type, true)[:docs] end + return { docs: solr_docs.nil? ? compound_articles : solr_docs + compound_articles, nb_pages: nb_pages } + end - def named_entities - article_ids = self.documents.select {|d| d['type'] == 'article' }.map{|d| d['id']} - issue_ids = self.documents.select {|d| d['type'] == 'issue' }.map{|d| d['id']} - parts_ids = self.documents.select {|d| d['type'] == 'compound' }.map{|d| CompoundArticle.find(d['id']).parts}.flatten.uniq - nems = [] - nems = SolrSearcher.query({q: "*:*", fq: "article_id_ssi:(#{article_ids.join(' OR ')})", rows: 1000000})['response']['docs'] unless article_ids.empty? - nems += SolrSearcher.query({q: "*:*", fq: "article_id_ssi:(#{parts_ids.join(' OR ')})", rows: 1000000})['response']['docs'] unless parts_ids.empty? - nems += SolrSearcher.query({q: "*:*", fq: "issue_id_ssi:(#{issue_ids.join(' OR ')})", rows: 1000000})['response']['docs'] unless issue_ids.empty? - output = {LOC: {}, PER: {}, ORG: {}, HumanProd: {}} - nems.select {|ne_solr| ne_solr['type_ssi'] == "LOC"}.each do |ne_solr| - output[:LOC][ne_solr['linked_entity_ssi']] = [] unless output[:LOC].has_key? ne_solr['linked_entity_ssi'] - output[:LOC][ne_solr['linked_entity_ssi']].append(ne_solr) - end - nems.select {|ne_solr| ne_solr['type_ssi'] == "PER"}.each do |ne_solr| - output[:PER][ne_solr['linked_entity_ssi']] = [] unless output[:PER].has_key? ne_solr['linked_entity_ssi'] - output[:PER][ne_solr['linked_entity_ssi']].append(ne_solr) - end - nems.select {|ne_solr| ne_solr['type_ssi'] == "ORG"}.each do |ne_solr| - output[:ORG][ne_solr['linked_entity_ssi']] = [] unless output[:ORG].has_key? ne_solr['linked_entity_ssi'] - output[:ORG][ne_solr['linked_entity_ssi']].append(ne_solr) - end - nems.select {|ne_solr| ne_solr['type_ssi'] == "HumanProd"}.each do |ne_solr| - output[:HumanProd][ne_solr['linked_entity_ssi']] = [] unless output[:HumanProd].has_key? ne_solr['linked_entity_ssi'] - output[:HumanProd][ne_solr['linked_entity_ssi']].append(ne_solr) - end - output + def named_entities + article_ids = self.documents.select { |d| d["type"] == "article" }.map { |d| d["id"] } + issue_ids = self.documents.select { |d| d["type"] == "issue" }.map { |d| d["id"] } + parts_ids = self.documents.select { |d| d["type"] == "compound" }.map { |d| CompoundArticle.find(d["id"]).parts }.flatten.uniq + nems = [] + nems = SolrSearcher.query({ q: "*:*", fq: "article_id_ssi:(#{article_ids.join(" OR ")})", rows: 1000000 })["response"]["docs"] unless article_ids.empty? + nems += SolrSearcher.query({ q: "*:*", fq: "article_id_ssi:(#{parts_ids.join(" OR ")})", rows: 1000000 })["response"]["docs"] unless parts_ids.empty? + nems += SolrSearcher.query({ q: "*:*", fq: "issue_id_ssi:(#{issue_ids.join(" OR ")})", rows: 1000000 })["response"]["docs"] unless issue_ids.empty? + output = { LOC: {}, PER: {}, ORG: {}, HumanProd: {} } + nems.select { |ne_solr| ne_solr["type_ssi"] == "LOC" }.each do |ne_solr| + output[:LOC][ne_solr["linked_entity_ssi"]] = [] unless output[:LOC].has_key? ne_solr["linked_entity_ssi"] + output[:LOC][ne_solr["linked_entity_ssi"]].append(ne_solr) + end + nems.select { |ne_solr| ne_solr["type_ssi"] == "PER" }.each do |ne_solr| + output[:PER][ne_solr["linked_entity_ssi"]] = [] unless output[:PER].has_key? ne_solr["linked_entity_ssi"] + output[:PER][ne_solr["linked_entity_ssi"]].append(ne_solr) + end + nems.select { |ne_solr| ne_solr["type_ssi"] == "ORG" }.each do |ne_solr| + output[:ORG][ne_solr["linked_entity_ssi"]] = [] unless output[:ORG].has_key? ne_solr["linked_entity_ssi"] + output[:ORG][ne_solr["linked_entity_ssi"]].append(ne_solr) + end + nems.select { |ne_solr| ne_solr["type_ssi"] == "HumanProd" }.each do |ne_solr| + output[:HumanProd][ne_solr["linked_entity_ssi"]] = [] unless output[:HumanProd].has_key? ne_solr["linked_entity_ssi"] + output[:HumanProd][ne_solr["linked_entity_ssi"]].append(ne_solr) end + output + end end diff --git a/app/models/experiment.rb b/app/models/experiment.rb index 4d46f6b..827b2b2 100644 --- a/app/models/experiment.rb +++ b/app/models/experiment.rb @@ -1,105 +1,103 @@ class Experiment < ActiveRecord::Base + belongs_to :user, optional: false + validates :title, length: { minimum: 1 } - belongs_to :user, optional: false - validates :title, length: { minimum: 1 } - - def add_tool(parent_id, tool) - if parent_id != 0 - self.locate_tool(self.description, parent_id) do |t| - t['children'] << tool.to_h - end - else - self.description['children'] << tool.to_h - end + def add_tool(parent_id, tool) + if parent_id != 0 + self.locate_tool(self.description, parent_id) do |t| + t["children"] << tool.to_h + end + else + self.description["children"] << tool.to_h end + end - def delete_tool(tool_id) - ids = detach_tool(self.description, nil, tool_id) - end + def delete_tool(tool_id) + ids = detach_tool(self.description, nil, tool_id) + end - def load_tools - ids = gather_ids self.description - Tool.where(id: ids).pluck(:id, :status, :tool_type, :input_type, :output_type, :parent_id).map do |t| - [t[0], {id: t[0], status: t[1], type: t[2], input_type: t[3], output_type: t[4], parent_id: t[5]}] - end.to_h - end + def load_tools + ids = gather_ids self.description + Tool.where(id: ids).pluck(:id, :status, :tool_type, :input_type, :output_type, :parent_id).map do |t| + [t[0], { id: t[0], status: t[1], type: t[2], input_type: t[3], output_type: t[4], parent_id: t[5] }] + end.to_h + end - def finished? - tools = self.load_tools - tools.values.all? { |t| t[:status] == "finished" } - end + def finished? + tools = self.load_tools + tools.values.all? { |t| t[:status] == "finished" } + end - def running? - tools = self.load_tools - tools.values.any? { |t| t[:status] == "running" } - end + def running? + tools = self.load_tools + tools.values.any? { |t| t[:status] == "running" } + end - def get_tool_ids - gather_ids self.description - end + def get_tool_ids + gather_ids self.description + end - def continue_from(tool_id) - locate_tool(self.description, tool_id) do |t| - tools_to_start = t['children'].map { |c| c['tool']['id'] } - tools_to_start.each do |tool_id| - tool = Tool.find(tool_id) - tool.run(true) if tool.runnable? - end - end + def continue_from(tool_id) + locate_tool(self.description, tool_id) do |t| + tools_to_start = t["children"].map { |c| c["tool"]["id"] } + tools_to_start.each do |tool_id| + tool = Tool.find(tool_id) + tool.run(true) if tool.runnable? + end end + end - private + private - def locate_tool(tree_part, tool_id, &block) - if tree_part.has_key?('tool') - if tree_part['tool']['id'] == tool_id - yield tree_part - return true - else - tree_part['children'].each do |subtree| - return true if locate_tool(subtree, tool_id, &block) - end - end - else - if tree_part['children'].empty? - yield tree_part - end - tree_part['children'].each do |subtree| - return true if locate_tool(subtree, tool_id, &block) - end + def locate_tool(tree_part, tool_id, &block) + if tree_part.has_key?("tool") + if tree_part["tool"]["id"] == tool_id + yield tree_part + return true + else + tree_part["children"].each do |subtree| + return true if locate_tool(subtree, tool_id, &block) end - false + end + else + if tree_part["children"].empty? + yield tree_part + end + tree_part["children"].each do |subtree| + return true if locate_tool(subtree, tool_id, &block) + end end + false + end - def detach_tool(tree, parent_array, tool_id, &block) - if tree.has_key?('tool') - if tree['tool']['id'] == tool_id - ids = gather_ids(tree) - parent_array.delete(tree) unless parent_array.nil? - return ids - else - tree['children'].each do |subtree| - res = detach_tool(subtree, tree['children'], tool_id, &block) - return res unless res.nil? - end - end - else - tree['children'].each do |subtree| - res = detach_tool(subtree, tree['children'], tool_id, &block) - return res unless res.nil? - end + def detach_tool(tree, parent_array, tool_id, &block) + if tree.has_key?("tool") + if tree["tool"]["id"] == tool_id + ids = gather_ids(tree) + parent_array.delete(tree) unless parent_array.nil? + return ids + else + tree["children"].each do |subtree| + res = detach_tool(subtree, tree["children"], tool_id, &block) + return res unless res.nil? end - nil + end + else + tree["children"].each do |subtree| + res = detach_tool(subtree, tree["children"], tool_id, &block) + return res unless res.nil? + end end + nil + end - def gather_ids(tree, ids=[]) - tree['children'].each do |subtree| - ids.concat(gather_ids(subtree)) - end - if tree.has_key?('tool') - ids << tree['tool']['id'] - end - return ids + def gather_ids(tree, ids = []) + tree["children"].each do |subtree| + ids.concat(gather_ids(subtree)) end - + if tree.has_key?("tool") + ids << tree["tool"]["id"] + end + return ids + end end diff --git a/app/models/issue.rb b/app/models/issue.rb index e617541..bb28951 100644 --- a/app/models/issue.rb +++ b/app/models/issue.rb @@ -1,103 +1,102 @@ class Issue + attr_accessor :id, :title, :date_created, :language, :original_uri, :nb_pages, :all_text, :thumbnail_url, :newspaper, :pages, :articles - attr_accessor :id, :title, :date_created, :language, :original_uri, :nb_pages, :all_text, :thumbnail_url, :newspaper, :pages, :articles + def self.from_solr(id, with_pages = false, with_articles = false) + solr_doc = SolrSearcher.get_doc_by_id id + Issue.from_solr_doc(solr_doc, with_pages, with_articles) + end - def self.from_solr(id, with_pages=false, with_articles=false) - solr_doc = SolrSearcher.get_doc_by_id id - Issue.from_solr_doc(solr_doc, with_pages, with_articles) + def self.from_solr_doc(solr_doc, with_pages = false, with_articles = false) + i = Issue.new + i.id = solr_doc["id"] + i.language = solr_doc["language_ssi"] + i.newspaper = solr_doc["member_of_collection_ids_ssim"][0] + i.title = solr_doc["title_ssi"] + i.date_created = solr_doc["date_created_ssi"] + i.original_uri = solr_doc["original_uri_ss"] + i.nb_pages = solr_doc["member_ids_ssim"].size + i.thumbnail_url = solr_doc["thumbnail_url_ss"] + i.all_text = solr_doc["all_text_t#{i.language}_siv"] + if with_pages + i.pages = [] + solr_doc["member_ids_ssim"].each do |pageid| + i.pages << Page.from_solr(pageid) + end end - - def self.from_solr_doc(solr_doc, with_pages=false, with_articles=false) - i = Issue.new - i.id = solr_doc['id'] - i.language = solr_doc['language_ssi'] - i.newspaper = solr_doc['member_of_collection_ids_ssim'][0] - i.title = solr_doc['title_ssi'] - i.date_created = solr_doc['date_created_ssi'] - i.original_uri = solr_doc['original_uri_ss'] - i.nb_pages = solr_doc['member_ids_ssim'].size - i.thumbnail_url = solr_doc['thumbnail_url_ss'] - i.all_text = solr_doc["all_text_t#{i.language}_siv"] - if with_pages - i.pages = [] - solr_doc['member_ids_ssim'].each do |pageid| - i.pages << Page.from_solr(pageid) - end - end - if with_articles - i.articles = [] - articles_docs = SolrSearcher.query({q: "*:*", fq: ["from_issue_ssi:#{i.id}", "has_model_ssim:Article"], fl:"*", rows:10000})['response']['docs'] - articles_docs.each do |articles_doc| - i.articles << Article.from_solr_doc(articles_doc) - end - end - i + if with_articles + i.articles = [] + articles_docs = SolrSearcher.query({ q: "*:*", fq: ["from_issue_ssi:#{i.id}", "has_model_ssim:Article"], fl: "*", rows: 10000 })["response"]["docs"] + articles_docs.each do |articles_doc| + i.articles << Article.from_solr_doc(articles_doc) + end end + i + end - def to_solr - solr_doc = {} - solr_doc['id'] = self.id - solr_doc['has_model_ssim'] = 'Issue' - solr_doc['title_ssi'] = self.title - solr_doc['date_created_ssi'] = self.date_created - solr_doc['date_created_dtsi'] = DateTime.parse(self.date_created).strftime('%Y-%m-%dT%H:%M:%SZ') - solr_doc['language_ssi'] = self.language - solr_doc['original_uri_ss'] = self.original_uri - solr_doc['nb_pages_isi'] = self.nb_pages - solr_doc['thumbnail_url_ss'] = self.thumbnail_url - solr_doc['member_ids_ssim'] = self.pages.map(&:id) - solr_doc['year_isi'] = solr_doc['date_created_ssi'][0..3].to_i - d = DateTime.parse solr_doc["date_created_dtsi"] - solr_doc['month_isi'] = d.month - solr_doc['day_isi'] = d.wday - solr_doc["member_of_collection_ids_ssim"] = self.newspaper - solr_doc["all_text_t#{self.language}_siv"] = self.all_text - solr_doc["all_text_unstemmed_t#{self.language}_siv"] = self.all_text - solr_doc - end + def to_solr + solr_doc = {} + solr_doc["id"] = self.id + solr_doc["has_model_ssim"] = "Issue" + solr_doc["title_ssi"] = self.title + solr_doc["date_created_ssi"] = self.date_created + solr_doc["date_created_dtsi"] = DateTime.parse(self.date_created).strftime("%Y-%m-%dT%H:%M:%SZ") + solr_doc["language_ssi"] = self.language + solr_doc["original_uri_ss"] = self.original_uri + solr_doc["nb_pages_isi"] = self.nb_pages + solr_doc["thumbnail_url_ss"] = self.thumbnail_url + solr_doc["member_ids_ssim"] = self.pages.map(&:id) + solr_doc["year_isi"] = solr_doc["date_created_ssi"][0..3].to_i + d = DateTime.parse solr_doc["date_created_dtsi"] + solr_doc["month_isi"] = d.month + solr_doc["day_isi"] = d.wday + solr_doc["member_of_collection_ids_ssim"] = self.newspaper + solr_doc["all_text_t#{self.language}_siv"] = self.all_text + solr_doc["all_text_unstemmed_t#{self.language}_siv"] = self.all_text + solr_doc + end - def get_thumbnail - if Rails.configuration.iiif_sources[:local].include? self.newspaper - "https://iiif.newseye.eu/iiif/#{self.newspaper}/#{self.id}_page_1.ptif/full/200,/0/default.jpg" - elsif Rails.configuration.iiif_sources[:external].include? self.newspaper - iiif_pages = self.pages.map{ |p| "#{p.iiif_url}/info.json" } # to change - elsif Rails.configuration.iiif_sources[:external_onb].include? self.newspaper - iiif_pages = self.pages.map{ |p| "#{p.iiif_url}/info.json" } # to change - end + def get_thumbnail + if Rails.configuration.iiif_sources[:local].include? self.newspaper + "https://iiif.newseye.eu/iiif/#{self.newspaper}/#{self.id}_page_1.ptif/full/200,/0/default.jpg" + elsif Rails.configuration.iiif_sources[:external].include? self.newspaper + iiif_pages = self.pages.map { |p| "#{p.iiif_url}/info.json" } # to change + elsif Rails.configuration.iiif_sources[:external_onb].include? self.newspaper + iiif_pages = self.pages.map { |p| "#{p.iiif_url}/info.json" } # to change end + end - def get_iiif_urls - if Rails.configuration.iiif_sources[:local].include? self.newspaper - iiif_pages = self.pages.map do |p| - "https://iiif.newseye.eu/iiif/#{self.newspaper}/#{self.id}_page_#{p.page_number}.ptif/info.json" - end - elsif Rails.configuration.iiif_sources[:external].include? self.newspaper - iiif_pages = self.pages.map{ |p| "#{p.iiif_url}/info.json" } - elsif Rails.configuration.iiif_sources[:external_onb].include? self.newspaper - iiif_pages = self.pages.map{ |p| "#{p.iiif_url}/info.json" } - end - iiif_pages + def get_iiif_urls + if Rails.configuration.iiif_sources[:local].include? self.newspaper + iiif_pages = self.pages.map do |p| + "https://iiif.newseye.eu/iiif/#{self.newspaper}/#{self.id}_page_#{p.page_number}.ptif/info.json" + end + elsif Rails.configuration.iiif_sources[:external].include? self.newspaper + iiif_pages = self.pages.map { |p| "#{p.iiif_url}/info.json" } + elsif Rails.configuration.iiif_sources[:external_onb].include? self.newspaper + iiif_pages = self.pages.map { |p| "#{p.iiif_url}/info.json" } end + iiif_pages + end - def self.named_entities(issue_id) - nems = SolrSearcher.query({q:"issue_id_ssi:#{issue_id}", rows: 1000000})['response']['docs'] - output = {LOC: {}, PER: {}, ORG: {}, HumanProd: {}} - nems.select {|ne_solr| ne_solr['type_ssi'] == "LOC"}.each do |ne_solr| - output[:LOC][ne_solr['linked_entity_ssi']] = [] unless output[:LOC].has_key? ne_solr['linked_entity_ssi'] - output[:LOC][ne_solr['linked_entity_ssi']].append(ne_solr) - end - nems.select {|ne_solr| ne_solr['type_ssi'] == "PER"}.each do |ne_solr| - output[:PER][ne_solr['linked_entity_ssi']] = [] unless output[:PER].has_key? ne_solr['linked_entity_ssi'] - output[:PER][ne_solr['linked_entity_ssi']].append(ne_solr) - end - nems.select {|ne_solr| ne_solr['type_ssi'] == "ORG"}.each do |ne_solr| - output[:ORG][ne_solr['linked_entity_ssi']] = [] unless output[:ORG].has_key? ne_solr['linked_entity_ssi'] - output[:ORG][ne_solr['linked_entity_ssi']].append(ne_solr) - end - nems.select {|ne_solr| ne_solr['type_ssi'] == "HumanProd"}.each do |ne_solr| - output[:HumanProd][ne_solr['linked_entity_ssi']] = [] unless output[:HumanProd].has_key? ne_solr['linked_entity_ssi'] - output[:HumanProd][ne_solr['linked_entity_ssi']].append(ne_solr) - end - output + def self.named_entities(issue_id) + nems = SolrSearcher.query({ q: "issue_id_ssi:#{issue_id}", rows: 1000000 })["response"]["docs"] + output = { LOC: {}, PER: {}, ORG: {}, HumanProd: {} } + nems.select { |ne_solr| ne_solr["type_ssi"] == "LOC" }.each do |ne_solr| + output[:LOC][ne_solr["linked_entity_ssi"]] = [] unless output[:LOC].has_key? ne_solr["linked_entity_ssi"] + output[:LOC][ne_solr["linked_entity_ssi"]].append(ne_solr) + end + nems.select { |ne_solr| ne_solr["type_ssi"] == "PER" }.each do |ne_solr| + output[:PER][ne_solr["linked_entity_ssi"]] = [] unless output[:PER].has_key? ne_solr["linked_entity_ssi"] + output[:PER][ne_solr["linked_entity_ssi"]].append(ne_solr) + end + nems.select { |ne_solr| ne_solr["type_ssi"] == "ORG" }.each do |ne_solr| + output[:ORG][ne_solr["linked_entity_ssi"]] = [] unless output[:ORG].has_key? ne_solr["linked_entity_ssi"] + output[:ORG][ne_solr["linked_entity_ssi"]].append(ne_solr) + end + nems.select { |ne_solr| ne_solr["type_ssi"] == "HumanProd" }.each do |ne_solr| + output[:HumanProd][ne_solr["linked_entity_ssi"]] = [] unless output[:HumanProd].has_key? ne_solr["linked_entity_ssi"] + output[:HumanProd][ne_solr["linked_entity_ssi"]].append(ne_solr) end -end \ No newline at end of file + output + end +end diff --git a/app/models/notification.rb b/app/models/notification.rb index d2bb227..be54276 100644 --- a/app/models/notification.rb +++ b/app/models/notification.rb @@ -1,6 +1,3 @@ class Notification < ActiveRecord::Base - - belongs_to :user, optional: false - - -end \ No newline at end of file + belongs_to :user, optional: false +end diff --git a/app/models/page.rb b/app/models/page.rb index 87f311c..0ba49fa 100644 --- a/app/models/page.rb +++ b/app/models/page.rb @@ -1,31 +1,30 @@ class Page + attr_accessor :id, :page_number, :width, :height, :mime_type, :iiif_url, :ocr_path, :image_path - attr_accessor :id, :page_number, :width, :height, :mime_type, :iiif_url, :ocr_path, :image_path + def self.from_solr(id) + attrs = SolrSearcher.get_doc_by_id id + p = Page.new + p.id = attrs["id"] + p.page_number = attrs["page_number_isi"] + p.width = attrs["width_isi"] + p.height = attrs["height_isi"] + p.mime_type = attrs["mime_type_ssi"] + p.iiif_url = attrs["iiif_url_ss"] + p.ocr_path = attrs["ocr_path_ss"] + p.image_path = attrs["image_path_ss"] if attrs["image_path_ss"] + p + end - def self.from_solr id - attrs = SolrSearcher.get_doc_by_id id - p = Page.new - p.id = attrs['id'] - p.page_number = attrs['page_number_isi'] - p.width = attrs['width_isi'] - p.height = attrs['height_isi'] - p.mime_type = attrs['mime_type_ssi'] - p.iiif_url = attrs['iiif_url_ss'] - p.ocr_path = attrs['ocr_path_ss'] - p.image_path = attrs['image_path_ss'] if attrs['image_path_ss'] - p - end - - def to_solr - solr_doc = {} - solr_doc['id'] = self.id - solr_doc['has_model_ssim'] = 'PageFileSet' - solr_doc['page_number_isi'] = self.page_number - solr_doc['width_isi'] = self.width - solr_doc['height_isi'] = self.height - solr_doc['mime_type_ssi'] = self.mime_type - solr_doc['iiif_url_ss'] = self.iiif_url - solr_doc['ocr_path_ss'] = self.ocr_path - solr_doc - end -end \ No newline at end of file + def to_solr + solr_doc = {} + solr_doc["id"] = self.id + solr_doc["has_model_ssim"] = "PageFileSet" + solr_doc["page_number_isi"] = self.page_number + solr_doc["width_isi"] = self.width + solr_doc["height_isi"] = self.height + solr_doc["mime_type_ssi"] = self.mime_type + solr_doc["iiif_url_ss"] = self.iiif_url + solr_doc["ocr_path_ss"] = self.ocr_path + solr_doc + end +end diff --git a/app/models/solr_query.rb b/app/models/solr_query.rb index 3e5edd7..80ae868 100644 --- a/app/models/solr_query.rb +++ b/app/models/solr_query.rb @@ -1,54 +1,52 @@ class SolrQuery + attr_accessor :defType, :sort, :start, :rows, :fq, :fl, # common parameters + :q, :q_dot_alt, :qf, :mm, :pf, :ps, :qs, :tie, :bq, :bf, # Dismax parameters + :sow, :mm_dot_autorelax, :boost, :lowercaseOperators, :pf2, :ps2, :pf3, :ps3, :stopwords, :uf, # Edismax parameters + :facet, :facet_dot_field, :facet_dot_threads, + :hl, + :mlt - attr_accessor :defType, :sort, :start, :rows, :fq, :fl, # common parameters - :q, :q_dot_alt, :qf, :mm, :pf, :ps, :qs, :tie, :bq, :bf, # Dismax parameters - :sow, :mm_dot_autorelax, :boost, :lowercaseOperators, :pf2, :ps2, :pf3, :ps3, :stopwords, :uf, # Edismax parameters - :facet, :facet_dot_field, :facet_dot_threads, - :hl, - :mlt + def initialize(search_type) + @defType = "edismax" + @sort = "score desc" + @start = 0 + @rows = 10 + # @fq = ["has_model_ssim:(Article OR Issue)"] + @fq = ["has_model_ssim:(Article)"] + @fl = "*,score" + @q = "*:*" + @q_dot_alt = "*:*" + @qf = I18n.t("newspapers.solr_fields").select { |k, v| k.start_with?(search_type == "stemmed" ? "text_stemmed" : "text_exact") }.values # or text_stemmed + @mm = 1 + @pf = "" + @ps = "" + @qs = "" + @tie = 0.1 + @bq = "" + @bf = "" + @hl = true + @hl_dot_fl = @qf - def initialize search_type - @defType = 'edismax' - @sort = 'score desc' - @start = 0 - @rows = 10 - # @fq = ["has_model_ssim:(Article OR Issue)"] - @fq = ["has_model_ssim:(Article)"] - @fl = '*,score' - @q = '*:*' - @q_dot_alt = '*:*' - @qf = I18n.t("newspapers.solr_fields").select{|k,v| k.start_with?( search_type=="stemmed" ? "text_stemmed" : "text_exact") }.values # or text_stemmed - @mm = 1 - @pf = "" - @ps = "" - @qs = "" - @tie = 0.1 - @bq = "" - @bf = "" - @hl = true - @hl_dot_fl = @qf - - @json_dot_facet = {} - I18n.t("newspapers.solr_fields").values_at(:language, :newspaper).each do |f| - @json_dot_facet[f] = { terms: { field: f, limit: 15, numBuckets: true} } - end - I18n.t("newspapers.solr_fields").values_at(:date).each do |f| - @json_dot_facet[f] = { terms: { field: f, limit: -1, numBuckets: true} } - end - I18n.t("newspapers.solr_fields").values_at(:month, :day).each do |f| - @json_dot_facet[f] = { terms: { field: f, limit: 15, numBuckets: true, sort: {index: "asc"}} } - end - I18n.t("newspapers.solr_fields").values_at(:persons, :locations, :organisations, :human_productions).each do |f| - @json_dot_facet[f] = { terms: { field: f, limit: 15, numBuckets: true} } - end - @json_dot_facet["min_date"] = "min(date_created_dtsi)" - @json_dot_facet["max_date"] = "max(date_created_dtsi)" + @json_dot_facet = {} + I18n.t("newspapers.solr_fields").values_at(:language, :newspaper).each do |f| + @json_dot_facet[f] = { terms: { field: f, limit: 15, numBuckets: true } } end - - def to_params - p = self.instance_values.select {|k,v| v != "" and !v.nil?}.transform_keys{|k| k.gsub('_dot_','.')}.with_indifferent_access - p["json.facet"] = p["json.facet"].to_json - p + I18n.t("newspapers.solr_fields").values_at(:date).each do |f| + @json_dot_facet[f] = { terms: { field: f, limit: -1, numBuckets: true } } + end + I18n.t("newspapers.solr_fields").values_at(:month, :day).each do |f| + @json_dot_facet[f] = { terms: { field: f, limit: 15, numBuckets: true, sort: { index: "asc" } } } + end + I18n.t("newspapers.solr_fields").values_at(:persons, :locations, :organisations, :human_productions).each do |f| + @json_dot_facet[f] = { terms: { field: f, limit: 15, numBuckets: true } } end + @json_dot_facet["min_date"] = "min(date_created_dtsi)" + @json_dot_facet["max_date"] = "max(date_created_dtsi)" + end -end \ No newline at end of file + def to_params + p = self.instance_values.select { |k, v| v != "" and !v.nil? }.transform_keys { |k| k.gsub("_dot_", ".") }.with_indifferent_access + p["json.facet"] = p["json.facet"].to_json + p + end +end diff --git a/app/models/solr_searcher.rb b/app/models/solr_searcher.rb index e2c4add..0ba1863 100644 --- a/app/models/solr_searcher.rb +++ b/app/models/solr_searcher.rb @@ -1,26 +1,25 @@ class SolrSearcher - include AbstractSearcher + include AbstractSearcher - @@connection = false + @@connection = false - def self.query params - connect unless @@connection - puts "[SolrSearcher.Query] #{params.to_json}\n" if Rails.env == "development" - @@connection.send_and_receive("select", data: params, method: :post) - end + def self.query(params) + connect unless @@connection + puts "[SolrSearcher.Query] #{params.to_json}\n" if Rails.env == "development" + @@connection.send_and_receive("select", data: params, method: :post) + end - def self.connect - @@connection = RSolr.connect(url: Rails.configuration.solr['url']) unless @@connection - end + def self.connect + @@connection = RSolr.connect(url: Rails.configuration.solr["url"]) unless @@connection + end - def self.get_doc_by_id(id) - connect unless @@connection - docs = @@connection.send_and_receive("select", data: {q: "id:#{id}"}, method: :post)['response']['docs'] - if docs.empty? - nil - else - docs[0] - end + def self.get_doc_by_id(id) + connect unless @@connection + docs = @@connection.send_and_receive("select", data: { q: "id:#{id}" }, method: :post)["response"]["docs"] + if docs.empty? + nil + else + docs[0] end - -end \ No newline at end of file + end +end diff --git a/app/models/tool.rb b/app/models/tool.rb index 35b0b6b..599c796 100644 --- a/app/models/tool.rb +++ b/app/models/tool.rb @@ -1,22 +1,20 @@ class Tool < ActiveRecord::Base + belongs_to :experiment, optional: false - belongs_to :experiment, optional: false - - def to_h - { + def to_h + { "tool": { - "id": self.id + "id": self.id, }, - "children": [] + "children": [], } - end - - def runnable? - self.status == "configured" && (self.parent_id.nil? || Tool.find(self.parent_id).status == "finished") - end + end - def run(continue=false) - "#{self.tool_type}_worker".camelize.constantize.perform_async(self.id, self.experiment.user.id, self.experiment.id, self.tool_type, self.parameters, continue) - end + def runnable? + self.status == "configured" && (self.parent_id.nil? || Tool.find(self.parent_id).status == "finished") + end + def run(continue = false) + "#{self.tool_type}_worker".camelize.constantize.perform_async(self.id, self.experiment.user.id, self.experiment.id, self.tool_type, self.parameters, continue) + end end diff --git a/app/models/user.rb b/app/models/user.rb index ae04d79..f02f51b 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -1,28 +1,28 @@ class User < ApplicationRecord - attribute :labs_user_id, presence: true, unique: true + attribute :labs_user_id, presence: true, unique: true - has_many :experiments - has_many :datasets - has_many :notifications - has_many :compound_articles - has_many :active_sessions + has_many :experiments + has_many :datasets + has_many :notifications + has_many :compound_articles + has_many :active_sessions - def datasets_with_doc doc_id - self.datasets.map do |dataset| - [dataset.id, dataset.title] if dataset.contains doc_id.to_s - end.delete_if(&:nil?) - end + def datasets_with_doc(doc_id) + self.datasets.map do |dataset| + [dataset.id, dataset.title] if dataset.contains doc_id.to_s + end.delete_if(&:nil?) + end - def compounds_by_issue - out = {} - self.compound_articles.each do |compound_article| - out[compound_article.issue_id] = [] unless out.has_key? compound_article.issue_id - out[compound_article.issue_id] << compound_article - end - out + def compounds_by_issue + out = {} + self.compound_articles.each do |compound_article| + out[compound_article.issue_id] = [] unless out.has_key? compound_article.issue_id + out[compound_article.issue_id] << compound_article end + out + end - def researcher? - Rails.configuration.auths['emails'].include? self.email - end + def researcher? + Rails.configuration.auths["emails"].include? self.email + end end diff --git a/app/workers/export_dataset_worker.rb b/app/workers/export_dataset_worker.rb index 8dad51b..a0a760b 100644 --- a/app/workers/export_dataset_worker.rb +++ b/app/workers/export_dataset_worker.rb @@ -1,88 +1,88 @@ require "zip" class ExportDatasetWorker - include Sidekiq::Worker - include ActionView::Helpers::FormOptionsHelper + include Sidekiq::Worker + include ActionView::Helpers::FormOptionsHelper - def perform(user_id, dataset_id, export_type) - dataset = Dataset.find(dataset_id) - file = Tempfile.new(["export_#{dataset.title.parameterize(separator: '_')}_", ".#{export_type}"], "tmp") - to_write = [] - named_entities = dataset.named_entities - named_entities = named_entities.values.map{|h| h.values }.flatten - documents = dataset.fetch_paginated_documents(1, 100, "default", "asc", "all", recursive=true) - documents[:docs].map do |doc| - case export_type - when "json" - lang = doc.language - thumb = doc.thumbnail_url - if doc.is_a?(CompoundArticle) - doc_type = "compound" - entities = named_entities.select{ |ne| doc.parts.include? ne['article_id_ssi'] } - elsif doc.is_a?(Article) - doc_type = "article" - entities = named_entities.select{ |ne| doc.id == ne['article_id_ssi'] } - elsif doc.is_a?(Issue) - doc_type = "issue" - entities = named_entities.select{ |ne| doc.id == ne['issue_id_ssi'] } - end - entities = entities.map do |ne| - { - mention: ne['mention_ssi'], - indexStart: ne['article_index_start_isi'], - indexEnd: ne['article_index_end_isi'], - stance: if ne['stance_fsi'] == 0 - "neutral" - else - ne['stance_fsi'] > 0 ? "positive" : "negative" - end, - linked_entity_url: ne['linked_entity_ssi'] == "" || ne['linked_entity_ssi'].nil? ? nil : "https://www.wikidata.org/wiki/#{ne['linked_entity_ssi'].split('_')[-1]}" + def perform(user_id, dataset_id, export_type) + dataset = Dataset.find(dataset_id) + file = Tempfile.new(["export_#{dataset.title.parameterize(separator: "_")}_", ".#{export_type}"], "tmp") + to_write = [] + named_entities = dataset.named_entities + named_entities = named_entities.values.map { |h| h.values }.flatten + documents = dataset.fetch_paginated_documents(1, 100, "default", "asc", "all", recursive = true) + documents[:docs].map do |doc| + case export_type + when "json" + lang = doc.language + thumb = doc.thumbnail_url + if doc.is_a?(CompoundArticle) + doc_type = "compound" + entities = named_entities.select { |ne| doc.parts.include? ne["article_id_ssi"] } + elsif doc.is_a?(Article) + doc_type = "article" + entities = named_entities.select { |ne| doc.id == ne["article_id_ssi"] } + elsif doc.is_a?(Issue) + doc_type = "issue" + entities = named_entities.select { |ne| doc.id == ne["issue_id_ssi"] } + end + entities = entities.map do |ne| + { + mention: ne["mention_ssi"], + indexStart: ne["article_index_start_isi"], + indexEnd: ne["article_index_end_isi"], + stance: if ne["stance_fsi"] == 0 + "neutral" + else + ne["stance_fsi"] > 0 ? "positive" : "negative" + end, + linked_entity_url: ne["linked_entity_ssi"] == "" || ne["linked_entity_ssi"].nil? ? nil : "https://www.wikidata.org/wiki/#{ne["linked_entity_ssi"].split("_")[-1]}", } - end - unless under_copyright(lang, doc.date_created, User.find(user_id)) - to_write << { id: doc.id, - type: doc_type, - language: lang, - date: doc.date_created, - newspaper_id: doc.newspaper, - iiif_url: thumb, - text: doc.all_text, - named_entities: entities } - end - when "zip" - filename = "#{doc.date_created}_#{doc.is_a?(CompoundArticle) ? "compound_#{doc.title}" : doc.id}.txt" - file_content = doc.all_text - File.open("/tmp/#{filename}", 'w') do |f| - f.write file_content - end - to_write << filename - end end - case export_type - when "json" - to_write = {"documents": to_write} - file.write to_write.to_json - file.close - when "zip" - Zip::File.open(file.path, Zip::File::CREATE) do |zipfile| - to_write.each do |filename| - zipfile.add filename, "/tmp/#{filename}" if filename - end - end - to_write.each do |filename| - File.delete("/tmp/#{filename}") if filename - end + unless under_copyright(lang, doc.date_created, User.find(user_id)) + to_write << { id: doc.id, + type: doc_type, + language: lang, + date: doc.date_created, + newspaper_id: doc.newspaper, + iiif_url: thumb, + text: doc.all_text, + named_entities: entities } + end + when "zip" + filename = "#{doc.date_created}_#{doc.is_a?(CompoundArticle) ? "compound_#{doc.title}" : doc.id}.txt" + file_content = doc.all_text + File.open("/tmp/#{filename}", "w") do |f| + f.write file_content end - content = "

    Your dataset is ready. Click here to download it.

    " - ActionCable.server.broadcast("notifications.#{user_id}", { - type: "notify", - html: ApplicationController.render(partial: "shared/notification", locals: {notif_title: dataset.title, notif_content: content}) - }) + to_write << filename + end end - - def under_copyright(lang, date, user) - nlf_doc = ["fi", "se"].include? lang - nlf_under_copyright = Date.parse("1910-12-31T00:00:00Z") <= Date.parse(date) - nlf_doc and nlf_under_copyright and !user.groups.include? "researcher" + case export_type + when "json" + to_write = { "documents": to_write } + file.write to_write.to_json + file.close + when "zip" + Zip::File.open(file.path, Zip::File::CREATE) do |zipfile| + to_write.each do |filename| + zipfile.add filename, "/tmp/#{filename}" if filename + end + end + to_write.each do |filename| + File.delete("/tmp/#{filename}") if filename + end end -end \ No newline at end of file + content = "

    Your dataset is ready. Click here to download it.

    " + ActionCable.server.broadcast("notifications.#{user_id}", { + type: "notify", + html: ApplicationController.render(partial: "shared/notification", locals: { notif_title: dataset.title, notif_content: content }), + }) + end + + def under_copyright(lang, date, user) + nlf_doc = ["fi", "se"].include? lang + nlf_under_copyright = Date.parse("1910-12-31T00:00:00Z") <= Date.parse(date) + nlf_doc and nlf_under_copyright and !user.groups.include? "researcher" + end +end diff --git a/app/workers/ngrams_worker.rb b/app/workers/ngrams_worker.rb index bc3dbee..348268e 100644 --- a/app/workers/ngrams_worker.rb +++ b/app/workers/ngrams_worker.rb @@ -1,58 +1,59 @@ class NgramsWorker - include Sidekiq::Worker + include Sidekiq::Worker - def perform(tool_id, user_id, experiment_id, tool_type, tool_parameters, continue=false) - tool = Tool.find(tool_id) - tool.status = "running" - tool.save! - ActionCable.server.broadcast("notifications.#{user_id}", { - type: "refresh_display", - html: ApplicationController.render(partial: "experiment/tree", locals: {experiment: Experiment.find(tool.experiment.id)}), - message: 'Starting job...' }) - parent_output = Tool.find(tool.parent_id).results - docs = parent_output["docs"].map{ |doc| doc['text'] } - n = tool_parameters.select{|t| t['name'] == 'n'}[0]['value'].to_i - min_freq = tool_parameters.select{|t| t['name'] == 'minimum_frequency'}[0]['value'].to_i - ngrams = find_ngrams(tool_id, experiment_id, user_id, docs, n, min_freq) - tool.results = {type:"ngrams", ngrams: ngrams} - tool.status = "finished" - tool.save! - experiment = Experiment.find(tool.experiment.id) - out = { - type: "refresh_display", - html: ApplicationController.render(partial: "experiment/tree", locals: {experiment: experiment}), - message: 'Done.' - } - ActionCable.server.broadcast("notifications.#{user_id}", out) - if continue - experiment.continue_from(tool_id) - end - if experiment.finished? - out = { - type: "experiment_finished", - message: 'Experiment has finished running.' - } - ActionCable.server.broadcast("notifications.#{user_id}", out) - end + def perform(tool_id, user_id, experiment_id, tool_type, tool_parameters, continue = false) + tool = Tool.find(tool_id) + tool.status = "running" + tool.save! + ActionCable.server.broadcast("notifications.#{user_id}", { + type: "refresh_display", + html: ApplicationController.render(partial: "experiment/tree", locals: { experiment: Experiment.find(tool.experiment.id) }), + message: "Starting job...", + }) + parent_output = Tool.find(tool.parent_id).results + docs = parent_output["docs"].map { |doc| doc["text"] } + n = tool_parameters.select { |t| t["name"] == "n" }[0]["value"].to_i + min_freq = tool_parameters.select { |t| t["name"] == "minimum_frequency" }[0]["value"].to_i + ngrams = find_ngrams(tool_id, experiment_id, user_id, docs, n, min_freq) + tool.results = { type: "ngrams", ngrams: ngrams } + tool.status = "finished" + tool.save! + experiment = Experiment.find(tool.experiment.id) + out = { + type: "refresh_display", + html: ApplicationController.render(partial: "experiment/tree", locals: { experiment: experiment }), + message: "Done.", + } + ActionCable.server.broadcast("notifications.#{user_id}", out) + if continue + experiment.continue_from(tool_id) end + if experiment.finished? + out = { + type: "experiment_finished", + message: "Experiment has finished running.", + } + ActionCable.server.broadcast("notifications.#{user_id}", out) + end + end - def find_ngrams(tool_id, experiment_id, user_id, documents, n, minimum_frequency) - total = {} - documents.each_with_index do |document, idx| - out = { - type: "completion_rate", - tool_id: tool_id, - experiment_id: experiment_id, - completion: ((idx/(documents.size).to_f)*100).to_i - } - ActionCable.server.broadcast("notifications.#{user_id}", out) if idx%20 == 0 - ngrams = document.split.each_cons(n).to_a - ngrams.reject! { |w1, w2| w1 !~ /^\w+/ || w2 !~ /^\w+/ } - ngrams.map!{ |ngram| ngram.join(' ') } - total.merge!( ngrams.each_with_object(Hash.new(0)) do |word, obj| - obj[word.downcase] += 1 - end) - end - total.sort_by { |k, v| -v }.reject { |k, v| v < minimum_frequency } + def find_ngrams(tool_id, experiment_id, user_id, documents, n, minimum_frequency) + total = {} + documents.each_with_index do |document, idx| + out = { + type: "completion_rate", + tool_id: tool_id, + experiment_id: experiment_id, + completion: ((idx / (documents.size).to_f) * 100).to_i, + } + ActionCable.server.broadcast("notifications.#{user_id}", out) if idx % 20 == 0 + ngrams = document.split.each_cons(n).to_a + ngrams.reject! { |w1, w2| w1 !~ /^\w+/ || w2 !~ /^\w+/ } + ngrams.map! { |ngram| ngram.join(" ") } + total.merge!(ngrams.each_with_object(Hash.new(0)) do |word, obj| + obj[word.downcase] += 1 + end) end + total.sort_by { |k, v| -v }.reject { |k, v| v < minimum_frequency } + end end diff --git a/app/workers/preprocess_worker.rb b/app/workers/preprocess_worker.rb index f634334..ee740c1 100644 --- a/app/workers/preprocess_worker.rb +++ b/app/workers/preprocess_worker.rb @@ -1,55 +1,56 @@ class PreprocessWorker - include Sidekiq::Worker + include Sidekiq::Worker - def perform(tool_id, user_id, experiment_id, tool_type, tool_parameters, continue=false) - tool = Tool.find(tool_id) - tool.status = "running" - tool.save! - ActionCable.server.broadcast("notifications.#{user_id}", { - type: "refresh_display", - html: ApplicationController.render(partial: "experiment/tree", locals: {experiment: Experiment.find(tool.experiment.id)}), - message: 'Starting job...' }) - parent_output = Tool.find(tool.parent_id).results - docs = parent_output["docs"] - docs = docs.each_with_index.map do |doc, idx| - out = { - type: "completion_rate", - tool_id: tool.id, - experiment_id: experiment_id, - completion: ((idx/(docs.size).to_f)*100).to_i - } - ActionCable.server.broadcast("notifications.#{user_id}", out) if idx%20 == 0 + def perform(tool_id, user_id, experiment_id, tool_type, tool_parameters, continue = false) + tool = Tool.find(tool_id) + tool.status = "running" + tool.save! + ActionCable.server.broadcast("notifications.#{user_id}", { + type: "refresh_display", + html: ApplicationController.render(partial: "experiment/tree", locals: { experiment: Experiment.find(tool.experiment.id) }), + message: "Starting job...", + }) + parent_output = Tool.find(tool.parent_id).results + docs = parent_output["docs"] + docs = docs.each_with_index.map do |doc, idx| + out = { + type: "completion_rate", + tool_id: tool.id, + experiment_id: experiment_id, + completion: ((idx / (docs.size).to_f) * 100).to_i, + } + ActionCable.server.broadcast("notifications.#{user_id}", out) if idx % 20 == 0 - doc['text'] = PragmaticTokenizer::Tokenizer.new( - language: doc['language'], - remove_stop_words: tool_parameters.select{|t| t['name'] == 'stopwords'}[0]['value'], - punctuation: tool_parameters.select{|t| t['name'] == 'punctuation'}[0]['value'] ? "none" : "all", - numbers: tool_parameters.select{|t| t['name'] == 'lowercase'}[0]['value'] ? "none" : "all", - clean: true, - downcase: tool_parameters.select{|t| t['name'] == 'lowercase'}[0]['value'], - minimum_length: 3 - ).tokenize(doc['text']).join(' ') - doc - end - tool.results = {type:"documents", docs: docs} - tool.status = "finished" - tool.save! - experiment = Experiment.find(tool.experiment.id) - out = { - type: "refresh_display", - html: ApplicationController.render(partial: "experiment/tree", locals: {experiment: experiment}), - message: 'Done.' - } - ActionCable.server.broadcast("notifications.#{user_id}", out) - if continue - experiment.continue_from(tool_id) - end - if experiment.finished? - out = { - type: "experiment_finished", - message: 'Experiment has finished running.' - } - ActionCable.server.broadcast("notifications.#{user_id}", out) - end + doc["text"] = PragmaticTokenizer::Tokenizer.new( + language: doc["language"], + remove_stop_words: tool_parameters.select { |t| t["name"] == "stopwords" }[0]["value"], + punctuation: tool_parameters.select { |t| t["name"] == "punctuation" }[0]["value"] ? "none" : "all", + numbers: tool_parameters.select { |t| t["name"] == "lowercase" }[0]["value"] ? "none" : "all", + clean: true, + downcase: tool_parameters.select { |t| t["name"] == "lowercase" }[0]["value"], + minimum_length: 3, + ).tokenize(doc["text"]).join(" ") + doc end + tool.results = { type: "documents", docs: docs } + tool.status = "finished" + tool.save! + experiment = Experiment.find(tool.experiment.id) + out = { + type: "refresh_display", + html: ApplicationController.render(partial: "experiment/tree", locals: { experiment: experiment }), + message: "Done.", + } + ActionCable.server.broadcast("notifications.#{user_id}", out) + if continue + experiment.continue_from(tool_id) + end + if experiment.finished? + out = { + type: "experiment_finished", + message: "Experiment has finished running.", + } + ActionCable.server.broadcast("notifications.#{user_id}", out) + end + end end diff --git a/app/workers/search_to_dataset_worker.rb b/app/workers/search_to_dataset_worker.rb index ce1da67..2315294 100644 --- a/app/workers/search_to_dataset_worker.rb +++ b/app/workers/search_to_dataset_worker.rb @@ -1,34 +1,34 @@ class SearchToDatasetWorker - include Sidekiq::Worker - include ActionView::Helpers::FormOptionsHelper + include Sidekiq::Worker + include ActionView::Helpers::FormOptionsHelper - def perform(user_id, dataset_id, search_params) - puts "### #{search_params}" - dataset = Dataset.find(dataset_id) - search_params['fl'] = 'id' - search_params['facet'] = false - search_params['rows'] = 100 - search_params['start'] = 0 - doc_ids = [] - res = SolrSearcher.query search_params - numFound = res['response']['numFound'] - doc_ids.concat res['response']['docs'].map{|d| d['id']} - while(doc_ids.size < numFound) - search_params['start'] += 100 - res = SolrSearcher.query search_params - numFound = res['response']['numFound'] - doc_ids.concat res['response']['docs'].map{|d| d['id']} - end - existing = dataset.add_documents doc_ids - nb_docs_added = doc_ids.size - existing.size - content = "

    #{nb_docs_added} document#{nb_docs_added > 1 ? "s were" : " was"} added to your dataset \"#{dataset.title}\"

    " - content.concat "

    #{existing.size} document#{existing.size > 1 ? "s" : ""} already exist in this dataset.

    " unless existing.empty? - # TODO: next line may cause bugs with the working dataset - dataset_options = options_for_select(User.find(user_id).datasets.map{|d| ["#{d.title} (#{d.documents.size} docs)", d.id]}) - ActionCable.server.broadcast("notifications.#{user_id}", { - type: "notify", - html: ApplicationController.render(partial: "shared/notification", locals: {notif_title: dataset.title, notif_content: content}), - dataset_options: dataset_options }) + def perform(user_id, dataset_id, search_params) + puts "### #{search_params}" + dataset = Dataset.find(dataset_id) + search_params["fl"] = "id" + search_params["facet"] = false + search_params["rows"] = 100 + search_params["start"] = 0 + doc_ids = [] + res = SolrSearcher.query search_params + numFound = res["response"]["numFound"] + doc_ids.concat res["response"]["docs"].map { |d| d["id"] } + while (doc_ids.size < numFound) + search_params["start"] += 100 + res = SolrSearcher.query search_params + numFound = res["response"]["numFound"] + doc_ids.concat res["response"]["docs"].map { |d| d["id"] } end - -end \ No newline at end of file + existing = dataset.add_documents doc_ids + nb_docs_added = doc_ids.size - existing.size + content = "

    #{nb_docs_added} document#{nb_docs_added > 1 ? "s were" : " was"} added to your dataset \"#{dataset.title}\"

    " + content.concat "

    #{existing.size} document#{existing.size > 1 ? "s" : ""} already exist in this dataset.

    " unless existing.empty? + # TODO: next line may cause bugs with the working dataset + dataset_options = options_for_select(User.find(user_id).datasets.map { |d| ["#{d.title} (#{d.documents.size} docs)", d.id] }) + ActionCable.server.broadcast("notifications.#{user_id}", { + type: "notify", + html: ApplicationController.render(partial: "shared/notification", locals: { notif_title: dataset.title, notif_content: content }), + dataset_options: dataset_options, + }) + end +end diff --git a/app/workers/source_dataset_worker.rb b/app/workers/source_dataset_worker.rb index bda0ec4..d303b8f 100644 --- a/app/workers/source_dataset_worker.rb +++ b/app/workers/source_dataset_worker.rb @@ -1,62 +1,63 @@ class SourceDatasetWorker - include Sidekiq::Worker + include Sidekiq::Worker - def perform(tool_id, user_id, experiment_id, tool_type, tool_parameters, continue=false) - tool = Tool.find(tool_id) - tool.status = "running" - tool.save! - ActionCable.server.broadcast("notifications.#{user_id}", { - type: "refresh_display", - html: ApplicationController.render(partial: "experiment/tree", locals: {experiment: Experiment.find(tool.experiment.id)}), - message: 'Starting job...' }) - docs = fetch_docs_from_dataset(tool_id, experiment_id, user_id, tool_parameters.select{|t| t['name'] == 'dataset'}[0]['value']) - tool.results = {type:"documents", docs: docs} - tool.status = "finished" - tool.save! - experiment = Experiment.find(tool.experiment.id) - out = { - type: "refresh_display", - html: ApplicationController.render(partial: "experiment/tree", locals: {experiment: experiment}), - message: 'Done.' - } - ActionCable.server.broadcast("notifications.#{user_id}", out) - if continue - experiment.continue_from(tool_id) - end - if experiment.finished? - out = { - type: "experiment_finished", - message: 'Experiment has finished running.' - } - ActionCable.server.broadcast("notifications.#{user_id}", out) - end + def perform(tool_id, user_id, experiment_id, tool_type, tool_parameters, continue = false) + tool = Tool.find(tool_id) + tool.status = "running" + tool.save! + ActionCable.server.broadcast("notifications.#{user_id}", { + type: "refresh_display", + html: ApplicationController.render(partial: "experiment/tree", locals: { experiment: Experiment.find(tool.experiment.id) }), + message: "Starting job...", + }) + docs = fetch_docs_from_dataset(tool_id, experiment_id, user_id, tool_parameters.select { |t| t["name"] == "dataset" }[0]["value"]) + tool.results = { type: "documents", docs: docs } + tool.status = "finished" + tool.save! + experiment = Experiment.find(tool.experiment.id) + out = { + type: "refresh_display", + html: ApplicationController.render(partial: "experiment/tree", locals: { experiment: experiment }), + message: "Done.", + } + ActionCable.server.broadcast("notifications.#{user_id}", out) + if continue + experiment.continue_from(tool_id) + end + if experiment.finished? + out = { + type: "experiment_finished", + message: "Experiment has finished running.", + } + ActionCable.server.broadcast("notifications.#{user_id}", out) end + end - def fetch_docs_from_dataset(tool_id, experiment_id, user_id, dataset_id) - d = Dataset.find(dataset_id) - all_docs = [] - docs = [] - page = 0 - while page == 0 or docs.size == 100 - page += 1 - docs = d.fetch_paginated_documents(page, 100, "default", "asc", "all")[:docs] - all_docs.concat docs - out = { - type: "completion_rate", - tool_id: tool_id, - experiment_id: experiment_id, - completion: d.documents.size == 0 ? 0 : ((all_docs.size.to_f/d.documents.size)*100).to_i - } - ActionCable.server.broadcast("notifications.#{user_id}", out) - end - all_docs.map do |doc| - { + def fetch_docs_from_dataset(tool_id, experiment_id, user_id, dataset_id) + d = Dataset.find(dataset_id) + all_docs = [] + docs = [] + page = 0 + while page == 0 or docs.size == 100 + page += 1 + docs = d.fetch_paginated_documents(page, 100, "default", "asc", "all")[:docs] + all_docs.concat docs + out = { + type: "completion_rate", + tool_id: tool_id, + experiment_id: experiment_id, + completion: d.documents.size == 0 ? 0 : ((all_docs.size.to_f / d.documents.size) * 100).to_i, + } + ActionCable.server.broadcast("notifications.#{user_id}", out) + end + all_docs.map do |doc| + { id: doc.id, newspaper: doc.newspaper, language: doc.language, text: doc.all_text, - date: doc.date_created + date: doc.date_created, } - end end + end end diff --git a/app/workers/splitter_worker.rb b/app/workers/splitter_worker.rb index bb90f61..a6444d9 100644 --- a/app/workers/splitter_worker.rb +++ b/app/workers/splitter_worker.rb @@ -1,8 +1,6 @@ class SplitterWorker - include Sidekiq::Worker - - def perform(tool_id, user_id, experiment_id, tool_type, tool_parameters) - - end + include Sidekiq::Worker + def perform(tool_id, user_id, experiment_id, tool_type, tool_parameters) + end end diff --git a/config/application.rb b/config/application.rb index 2755819..7eaa9a2 100644 --- a/config/application.rb +++ b/config/application.rb @@ -7,22 +7,33 @@ require "rails/all" Bundler.require(*Rails.groups) module NewspapersPlatform - class Application < Rails::Application - # Initialize configuration defaults for originally generated Rails version. - config.load_defaults 6.1 + class Application < Rails::Application + # Initialize configuration defaults for originally generated Rails version. + config.load_defaults 6.1 - # Configuration for the application, engines, and railties goes here. - # - # These settings can be overridden in specific environments using the files - # in config/environments, which are processed later. - # - # config.time_zone = "Central Time (US & Canada)" - # config.eager_load_paths << Rails.root.join("extras") - config.solr = config_for('solr') - if File.exist? "#{Rails.root}/config/auths.yml" - config.auths = config_for('auths') - end - # config.active_job.queue_adapter = :sidekiq - config.iiif_sources = config_for("iiif_sources") + # Configuration for the application, engines, and railties goes here. + # + # These settings can be overridden in specific environments using the files + # in config/environments, which are processed later. + # + # config.time_zone = "Central Time (US & Canada)" + # config.eager_load_paths << Rails.root.join("extras") + config.solr = config_for("solr") + if File.exist? "#{Rails.root}/config/auths.yml" + config.auths = config_for("auths") end + # config.active_job.queue_adapter = :sidekiq + config.iiif_sources = config_for("iiif_sources") + + # Allow requests from different origins + config.middleware.use Rack::Cors do + allow do + origins "*" + resource "*", + headers: :any, + expose: %w(access-token expiry token-type uid client), + methods: %i(post) + end + end + end end diff --git a/config/boot.rb b/config/boot.rb index 3cda23b..988a5dd 100644 --- a/config/boot.rb +++ b/config/boot.rb @@ -1,4 +1,4 @@ -ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../Gemfile', __dir__) +ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../Gemfile", __dir__) require "bundler/setup" # Set up gems listed in the Gemfile. require "bootsnap/setup" # Speed up boot time by caching expensive operations. diff --git a/config/environments/production.rb b/config/environments/production.rb index 2a69174..b67d741 100644 --- a/config/environments/production.rb +++ b/config/environments/production.rb @@ -13,7 +13,7 @@ Rails.application.configure do config.eager_load = true # Full error reports are disabled and caching is turned on. - config.consider_all_requests_local = false + config.consider_all_requests_local = false config.action_controller.perform_caching = true # Ensures that a master key has been made available in either ENV["RAILS_MASTER_KEY"] @@ -22,7 +22,7 @@ Rails.application.configure do # Disable serving static files from the `/public` folder by default since # Apache or NGINX already handles this. - config.public_file_server.enabled = ENV['RAILS_SERVE_STATIC_FILES'].present? + config.public_file_server.enabled = ENV["RAILS_SERVE_STATIC_FILES"].present? # Compress CSS using a preprocessor. # config.assets.css_compressor = :sass @@ -53,7 +53,7 @@ Rails.application.configure do config.log_level = :info # Prepend all log lines with the following tags. - config.log_tags = [ :request_id ] + config.log_tags = [:request_id] # Use a different cache store in production. # config.cache_store = :mem_cache_store @@ -89,9 +89,9 @@ Rails.application.configure do # config.logger = ActiveSupport::TaggedLogging.new(Syslog::Logger.new 'app-name') if ENV["RAILS_LOG_TO_STDOUT"].present? - logger = ActiveSupport::Logger.new(STDOUT) + logger = ActiveSupport::Logger.new(STDOUT) logger.formatter = config.log_formatter - config.logger = ActiveSupport::TaggedLogging.new(logger) + config.logger = ActiveSupport::TaggedLogging.new(logger) end # Do not dump schema after migrations. diff --git a/config/environments/test.rb b/config/environments/test.rb index 93ed4f1..4fbe458 100644 --- a/config/environments/test.rb +++ b/config/environments/test.rb @@ -19,11 +19,11 @@ Rails.application.configure do # Configure public file server for tests with Cache-Control for performance. config.public_file_server.enabled = true config.public_file_server.headers = { - 'Cache-Control' => "public, max-age=#{1.hour.to_i}" + "Cache-Control" => "public, max-age=#{1.hour.to_i}", } # Show full error reports and disable caching. - config.consider_all_requests_local = true + config.consider_all_requests_local = true config.action_controller.perform_caching = false config.cache_store = :null_store diff --git a/config/initializers/assets.rb b/config/initializers/assets.rb index 4b828e8..c1f948d 100644 --- a/config/initializers/assets.rb +++ b/config/initializers/assets.rb @@ -1,12 +1,12 @@ # Be sure to restart your server when you modify this file. # Version of your assets, change this if you want to expire all your assets. -Rails.application.config.assets.version = '1.0' +Rails.application.config.assets.version = "1.0" # Add additional assets to the asset load path. # Rails.application.config.assets.paths << Emoji.images_path # Add Yarn node_modules folder to the asset load path. -Rails.application.config.assets.paths << Rails.root.join('node_modules') +Rails.application.config.assets.paths << Rails.root.join("node_modules") # Precompile additional assets. # application.js, application.css, and all non-JS/CSS in the app/assets diff --git a/config/initializers/filter_parameter_logging.rb b/config/initializers/filter_parameter_logging.rb index 4b34a03..0af68a6 100644 --- a/config/initializers/filter_parameter_logging.rb +++ b/config/initializers/filter_parameter_logging.rb @@ -2,5 +2,5 @@ # Configure sensitive parameters which will be filtered from the log file. Rails.application.config.filter_parameters += [ - :passw, :secret, :token, :_key, :crypt, :salt, :certificate, :otp, :ssn + :passw, :secret, :token, :_key, :crypt, :salt, :certificate, :otp, :ssn, ] diff --git a/config/initializers/session_store.rb b/config/initializers/session_store.rb index df3891e..38f5f69 100644 --- a/config/initializers/session_store.rb +++ b/config/initializers/session_store.rb @@ -1,2 +1,2 @@ -Rails.application.config.session_store :active_record_store, :key => '_web_session' -# Run this regularly to clean up DB: 'rails db:sessions:trim' \ No newline at end of file +Rails.application.config.session_store :active_record_store, :key => "_web_session" +# Run this regularly to clean up DB: 'rails db:sessions:trim' diff --git a/config/routes.rb b/config/routes.rb index b753e95..7ee124b 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -1,58 +1,58 @@ # frozen_string_literal: true -require 'sidekiq/web' +require "sidekiq/web" Rails.application.routes.draw do - root to: 'catalog#home' + root to: "catalog#home" - get '/send', to: "application#send_file" + get "/send", to: "application#send_file" - get '/search', to: 'catalog#index' - get '/catalog/:id', to: 'catalog#show' - post '/catalog/facet_pagination', to: 'catalog#paginate_facets' - post '/named_entities', to: 'catalog#named_entities_for_docs' - post '/dataset_named_entities', to: 'catalog#named_entities_for_dataset' - post '/catalog/wide_dates_histogram', to: 'catalog#wide_dates_histogram' - post '/catalog/confirm_compound_creation', to: 'catalog#confirm_compound_creation' - post '/catalog/create_compound', to: 'catalog#create_compound' - post '/catalog/delete_compound', to: 'catalog#delete_compound' - post '/catalog/random_sample', to: 'catalog#random_sample' + get "/search", to: "catalog#index" + get "/catalog/:id", to: "catalog#show" + post "/catalog/facet_pagination", to: "catalog#paginate_facets" + post "/named_entities", to: "catalog#named_entities_for_docs" + post "/dataset_named_entities", to: "catalog#named_entities_for_dataset" + post "/catalog/wide_dates_histogram", to: "catalog#wide_dates_histogram" + post "/catalog/confirm_compound_creation", to: "catalog#confirm_compound_creation" + post "/catalog/create_compound", to: "catalog#create_compound" + post "/catalog/delete_compound", to: "catalog#delete_compound" + post "/catalog/random_sample", to: "catalog#random_sample" - get '/datasets', to: 'dataset#index' - get '/datasets/update', to: 'dataset#update_datasets_list' - post '/datasets/working_dataset', to: 'dataset#set_working_dataset' - post "/datasets/add_selected_documents", to: "dataset#add_selected_documents" - post "/datasets/add_compound", to: "dataset#add_compound" - post "/datasets/remove_selected_documents", to: "dataset#remove_selected_documents" - post "/datasets/add_all_documents", to: "dataset#add_all_documents" - post "/datasets/export_dataset", to: "dataset#export_dataset" - get "/datasets/list", to: "dataset#list_datasets" - get '/dataset/:id', to: 'dataset#show' - post "/dataset/toggle_sharing_status", to: "dataset#toggle_sharing_status" - post "/dataset/:id/paginate", to: "dataset#paginate" - post '/dataset/create', to: 'dataset#create_dataset' - post '/dataset/rename', to: 'dataset#rename_dataset' - post '/dataset/import', to: 'dataset#import_dataset' - post '/dataset/delete', to: 'dataset#delete_dataset' + get "/datasets", to: "dataset#index" + get "/datasets/update", to: "dataset#update_datasets_list" + post "/datasets/working_dataset", to: "dataset#set_working_dataset" + post "/datasets/add_selected_documents", to: "dataset#add_selected_documents" + post "/datasets/add_compound", to: "dataset#add_compound" + post "/datasets/remove_selected_documents", to: "dataset#remove_selected_documents" + post "/datasets/add_all_documents", to: "dataset#add_all_documents" + post "/datasets/export_dataset", to: "dataset#export_dataset" + get "/datasets/list", to: "dataset#list_datasets" + get "/dataset/:id", to: "dataset#show" + post "/dataset/toggle_sharing_status", to: "dataset#toggle_sharing_status" + post "/dataset/:id/paginate", to: "dataset#paginate" + post "/dataset/create", to: "dataset#create_dataset" + post "/dataset/rename", to: "dataset#rename_dataset" + post "/dataset/import", to: "dataset#import_dataset" + post "/dataset/delete", to: "dataset#delete_dataset" - get '/experiments', to: 'experiment#index' - get '/experiments/update', to: 'experiment#update_experiments_list' - post '/experiment/create', to: 'experiment#create' - post '/experiment/delete', to: 'experiment#delete' - post '/experiment/run', to: 'experiment#run_experiment' - get '/experiment/:id', to: "experiment#show" - get '/experiment/:id/load', to: "experiment#load" - post '/experiment/:id/save', to: "experiment#save" - post '/experiment/:id/add_tool', to: "experiment#add_tool" - post '/experiment/:id/delete_tool', to: "experiment#delete_tool" - post '/experiment/:id/edit_tool', to: "experiment#edit_tool" - post '/experiment/:id/edit_tool_form', to: "experiment#edit_tool_form" - post '/experiment/:id/tool_results', to: "experiment#tool_results" - post '/experiment/:id/run_tool', to: "experiment#run_tool" - post '/experiment/:id/run_experiment', to: "experiment#run_experiment" + get "/experiments", to: "experiment#index" + get "/experiments/update", to: "experiment#update_experiments_list" + post "/experiment/create", to: "experiment#create" + post "/experiment/delete", to: "experiment#delete" + post "/experiment/run", to: "experiment#run_experiment" + get "/experiment/:id", to: "experiment#show" + get "/experiment/:id/load", to: "experiment#load" + post "/experiment/:id/save", to: "experiment#save" + post "/experiment/:id/add_tool", to: "experiment#add_tool" + post "/experiment/:id/delete_tool", to: "experiment#delete_tool" + post "/experiment/:id/edit_tool", to: "experiment#edit_tool" + post "/experiment/:id/edit_tool_form", to: "experiment#edit_tool_form" + post "/experiment/:id/tool_results", to: "experiment#tool_results" + post "/experiment/:id/run_tool", to: "experiment#run_tool" + post "/experiment/:id/run_experiment", to: "experiment#run_experiment" - resources :tool, only: [:show, :create, :update, :destroy] + resources :tool, only: [:show, :create, :update, :destroy] - mount ActionCable.server => '/cable' - if Rails.env.development? - mount Sidekiq::Web => '/sidekiq' - end + mount ActionCable.server => "/cable" + if Rails.env.development? + mount Sidekiq::Web => "/sidekiq" + end end diff --git a/db/migrate/20210721081005_create_users.rb b/db/migrate/20210721081005_create_users.rb index 46e5b17..d2be7d5 100644 --- a/db/migrate/20210721081005_create_users.rb +++ b/db/migrate/20210721081005_create_users.rb @@ -2,7 +2,7 @@ class CreateUsers < ActiveRecord::Migration[6.1] def change create_table :users do |t| t.string :labs_user_id, null: false, unique: true - + t.timestamps null: false end diff --git a/db/migrate/20210903194218_create_experiment.rb b/db/migrate/20210903194218_create_experiment.rb index 8c0761e..3dc1d36 100644 --- a/db/migrate/20210903194218_create_experiment.rb +++ b/db/migrate/20210903194218_create_experiment.rb @@ -1,11 +1,11 @@ class CreateExperiment < ActiveRecord::Migration[6.1] - def change - create_table :experiments do |t| - t.string :title - t.references :user, foreign_key: true - t.jsonb :description, default: {children:[]} - t.timestamps - end - add_index :experiments, [:title, :user_id], unique: true + def change + create_table :experiments do |t| + t.string :title + t.references :user, foreign_key: true + t.jsonb :description, default: { children: [] } + t.timestamps end + add_index :experiments, [:title, :user_id], unique: true + end end diff --git a/db/migrate/20210909142841_create_datasets.rb b/db/migrate/20210909142841_create_datasets.rb index 2e54da3..44dfd60 100644 --- a/db/migrate/20210909142841_create_datasets.rb +++ b/db/migrate/20210909142841_create_datasets.rb @@ -1,12 +1,12 @@ class CreateDatasets < ActiveRecord::Migration[6.1] - def change - create_table :datasets do |t| - t.string :title - t.references :user, foreign_key: true - t.jsonb :documents, null: false, default: [] - t.boolean :public, default: false - t.timestamps - end - add_index :datasets, [:title, :user_id], unique: true + def change + create_table :datasets do |t| + t.string :title + t.references :user, foreign_key: true + t.jsonb :documents, null: false, default: [] + t.boolean :public, default: false + t.timestamps end + add_index :datasets, [:title, :user_id], unique: true + end end diff --git a/db/migrate/20210915140752_add_tool.rb b/db/migrate/20210915140752_add_tool.rb index b0363d3..2705fb6 100644 --- a/db/migrate/20210915140752_add_tool.rb +++ b/db/migrate/20210915140752_add_tool.rb @@ -1,15 +1,15 @@ class AddTool < ActiveRecord::Migration[6.1] def change - create_table :tools do |t| - t.references :experiment, foreign_key: true - t.references :parent, foreign_key: {to_table: :tools} - t.string :tool_type - t.string :input_type - t.string :output_type - t.jsonb :parameters, default: {} - t.jsonb :results, default: {} - t.string :status, default: "created" - t.timestamps - end + create_table :tools do |t| + t.references :experiment, foreign_key: true + t.references :parent, foreign_key: { to_table: :tools } + t.string :tool_type + t.string :input_type + t.string :output_type + t.jsonb :parameters, default: {} + t.jsonb :results, default: {} + t.string :status, default: "created" + t.timestamps + end end end diff --git a/db/migrate/20211110092535_add_notifications_table.rb b/db/migrate/20211110092535_add_notifications_table.rb index 2b06ba8..9be6378 100644 --- a/db/migrate/20211110092535_add_notifications_table.rb +++ b/db/migrate/20211110092535_add_notifications_table.rb @@ -1,10 +1,10 @@ class AddNotificationsTable < ActiveRecord::Migration[6.1] def change - create_table :notifications do |t| - t.references :user, foreign_key: true - t.string :content - t.boolean :read, default: false - t.timestamps - end + create_table :notifications do |t| + t.references :user, foreign_key: true + t.string :content + t.boolean :read, default: false + t.timestamps + end end end diff --git a/db/migrate/20211123112405_create_compound_articles.rb b/db/migrate/20211123112405_create_compound_articles.rb index 53bc24f..3b34a12 100644 --- a/db/migrate/20211123112405_create_compound_articles.rb +++ b/db/migrate/20211123112405_create_compound_articles.rb @@ -1,17 +1,17 @@ class CreateCompoundArticles < ActiveRecord::Migration[6.1] - def change - create_table :compound_articles do |t| - t.string :title - t.string :issue_id - t.string :newspaper - t.string :date_created - t.string :thumbnail_url - t.string :language - t.text :all_text - t.references :user, foreign_key: true - t.string :parts, array: true, default: [] - t.timestamps - end - add_index :compound_articles, [:title, :user_id], unique: true + def change + create_table :compound_articles do |t| + t.string :title + t.string :issue_id + t.string :newspaper + t.string :date_created + t.string :thumbnail_url + t.string :language + t.text :all_text + t.references :user, foreign_key: true + t.string :parts, array: true, default: [] + t.timestamps end + add_index :compound_articles, [:title, :user_id], unique: true + end end diff --git a/db/schema.rb b/db/schema.rb index 164227f..bc77a97 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -45,7 +45,7 @@ ActiveRecord::Schema.define(version: 2021_12_02_095539) do create_table "experiments", force: :cascade do |t| t.string "title" t.bigint "user_id" - t.jsonb "description", default: {"children"=>[]} + t.jsonb "description", default: { "children" => [] } t.datetime "created_at", precision: 6, null: false t.datetime "updated_at", precision: 6, null: false t.index ["title", "user_id"], name: "index_experiments_on_title_and_user_id", unique: true diff --git a/test/test_helper.rb b/test/test_helper.rb index 47b598d..d713e37 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -1,4 +1,4 @@ -ENV['RAILS_ENV'] ||= 'test' +ENV["RAILS_ENV"] ||= "test" require_relative "../config/environment" require "rails/test_help" diff --git a/test/workers/tool_runner_worker_test.rb b/test/workers/tool_runner_worker_test.rb index 29db595..95a67b1 100644 --- a/test/workers/tool_runner_worker_test.rb +++ b/test/workers/tool_runner_worker_test.rb @@ -1,4 +1,5 @@ -require 'test_helper' +require "test_helper" + class ToolRunnerWorkerTest < Minitest::Test def test_example skip "add some examples to (or delete) #{__FILE__}" -- GitLab