Skip to content
class PreprocessWorker
include Sidekiq::Worker
include Sidekiq::Worker
def perform(tool_id, user_id, experiment_id, tool_type, tool_parameters, continue=false)
tool = Tool.find(tool_id)
tool.status = "running"
tool.save!
ActionCable.server.broadcast("notifications.#{user_id}", {
type: "refresh_display",
html: ApplicationController.render(partial: "experiment/tree", locals: {experiment: Experiment.find(tool.experiment.id)}),
message: 'Starting job...' })
parent_output = Tool.find(tool.parent_id).results
docs = parent_output["docs"]
docs = docs.each_with_index.map do |doc, idx|
out = {
type: "completion_rate",
tool_id: tool.id,
experiment_id: experiment_id,
completion: ((idx/(docs.size).to_f)*100).to_i
}
ActionCable.server.broadcast("notifications.#{user_id}", out) if idx%20 == 0
def perform(tool_id, user_id, experiment_id, tool_type, tool_parameters, continue = false)
tool = Tool.find(tool_id)
tool.status = "running"
tool.save!
ActionCable.server.broadcast("notifications.#{user_id}", {
type: "refresh_display",
html: ApplicationController.render(partial: "experiment/tree", locals: { experiment: Experiment.find(tool.experiment.id) }),
message: "Starting job...",
})
parent_output = Tool.find(tool.parent_id).results
docs = parent_output["docs"]
docs = docs.each_with_index.map do |doc, idx|
out = {
type: "completion_rate",
tool_id: tool.id,
experiment_id: experiment_id,
completion: ((idx / (docs.size).to_f) * 100).to_i,
}
ActionCable.server.broadcast("notifications.#{user_id}", out) if idx % 20 == 0
doc['text'] = PragmaticTokenizer::Tokenizer.new(
language: doc['language'],
remove_stop_words: tool_parameters.select{|t| t['name'] == 'stopwords'}[0]['value'],
punctuation: tool_parameters.select{|t| t['name'] == 'punctuation'}[0]['value'] ? "none" : "all",
numbers: tool_parameters.select{|t| t['name'] == 'lowercase'}[0]['value'] ? "none" : "all",
clean: true,
downcase: tool_parameters.select{|t| t['name'] == 'lowercase'}[0]['value'],
minimum_length: 3
).tokenize(doc['text']).join(' ')
doc
end
tool.results = {type:"documents", docs: docs}
tool.status = "finished"
tool.save!
experiment = Experiment.find(tool.experiment.id)
out = {
type: "refresh_display",
html: ApplicationController.render(partial: "experiment/tree", locals: {experiment: experiment}),
message: 'Done.'
}
ActionCable.server.broadcast("notifications.#{user_id}", out)
if continue
experiment.continue_from(tool_id)
end
if experiment.finished?
out = {
type: "experiment_finished",
message: 'Experiment has finished running.'
}
ActionCable.server.broadcast("notifications.#{user_id}", out)
end
doc["text"] = PragmaticTokenizer::Tokenizer.new(
language: doc["language"],
remove_stop_words: tool_parameters.select { |t| t["name"] == "stopwords" }[0]["value"],
punctuation: tool_parameters.select { |t| t["name"] == "punctuation" }[0]["value"] ? "none" : "all",
numbers: tool_parameters.select { |t| t["name"] == "lowercase" }[0]["value"] ? "none" : "all",
clean: true,
downcase: tool_parameters.select { |t| t["name"] == "lowercase" }[0]["value"],
minimum_length: 3,
).tokenize(doc["text"]).join(" ")
doc
end
tool.results = { type: "documents", docs: docs }
tool.status = "finished"
tool.save!
experiment = Experiment.find(tool.experiment.id)
out = {
type: "refresh_display",
html: ApplicationController.render(partial: "experiment/tree", locals: { experiment: experiment }),
message: "Done.",
}
ActionCable.server.broadcast("notifications.#{user_id}", out)
if continue
experiment.continue_from(tool_id)
end
if experiment.finished?
out = {
type: "experiment_finished",
message: "Experiment has finished running.",
}
ActionCable.server.broadcast("notifications.#{user_id}", out)
end
end
end
class SearchToDatasetWorker
include Sidekiq::Worker
include ActionView::Helpers::FormOptionsHelper
include Sidekiq::Worker
include ActionView::Helpers::FormOptionsHelper
def perform(user_id, dataset_id, search_params)
puts "### #{search_params}"
dataset = Dataset.find(dataset_id)
search_params['fl'] = 'id'
search_params['facet'] = false
search_params['rows'] = 100
search_params['start'] = 0
doc_ids = []
res = SolrSearcher.query search_params
numFound = res['response']['numFound']
doc_ids.concat res['response']['docs'].map{|d| d['id']}
while(doc_ids.size < numFound)
search_params['start'] += 100
res = SolrSearcher.query search_params
numFound = res['response']['numFound']
doc_ids.concat res['response']['docs'].map{|d| d['id']}
end
existing = dataset.add_documents doc_ids
nb_docs_added = doc_ids.size - existing.size
content = "<p>#{nb_docs_added} document#{nb_docs_added > 1 ? "s were" : " was"} added to your dataset <strong>\"#{dataset.title}\"</strong></p>"
content.concat "<p>#{existing.size} document#{existing.size > 1 ? "s" : ""} already exist in this dataset.</p>" unless existing.empty?
# TODO: next line may cause bugs with the working dataset
dataset_options = options_for_select(User.find(user_id).datasets.map{|d| ["#{d.title} (#{d.documents.size} docs)", d.id]})
ActionCable.server.broadcast("notifications.#{user_id}", {
type: "notify",
html: ApplicationController.render(partial: "shared/notification", locals: {notif_title: dataset.title, notif_content: content}),
dataset_options: dataset_options })
def perform(user_id, dataset_id, search_params, time)
puts "### #{search_params}"
dataset = Dataset.find(dataset_id)
search_params["fl"] = "id"
search_params["facet"] = false
search_params["rows"] = 100
search_params["start"] = 0
doc_ids = []
res = SolrSearcher.query search_params
numFound = res["response"]["numFound"]
doc_ids.concat res["response"]["docs"].map { |d| d["id"] }
while (doc_ids.size < numFound)
search_params["start"] += 100
res = SolrSearcher.query search_params
numFound = res["response"]["numFound"]
doc_ids.concat res["response"]["docs"].map { |d| d["id"] }
completion = (100 * doc_ids.size / numFound).to_i
ActionCable.server.broadcast("notifications.#{user_id}", {
type: "completion_rate",
dataset_id: dataset_id,
time: time,
completion: completion,
})
end
end
\ No newline at end of file
existing = dataset.add_documents doc_ids
nb_docs_added = doc_ids.size - existing.size
content = "<p>#{nb_docs_added} document#{nb_docs_added > 1 ? "s were" : " was"} added to your dataset <strong>\"#{dataset.title}\"</strong></p>"
content.concat "<p>#{existing.size} document#{existing.size > 1 ? "s" : ""} already exist in this dataset.</p>" unless existing.empty?
# TODO: next line may cause bugs with the working dataset
dataset_options = options_for_select(User.find(user_id).datasets.map { |d| ["#{d.title} (#{d.documents.size} docs)", d.id] })
ActionCable.server.broadcast("notifications.#{user_id}", {
type: "notify",
html: ApplicationController.render(partial: "shared/notification", locals: { notif_title: dataset.title, notif_content: content, notif_autohide: "true" }),
dataset_options: dataset_options,
})
end
end
class SourceDatasetWorker
include Sidekiq::Worker
include Sidekiq::Worker
def perform(tool_id, user_id, experiment_id, tool_type, tool_parameters, continue=false)
tool = Tool.find(tool_id)
tool.status = "running"
tool.save!
ActionCable.server.broadcast("notifications.#{user_id}", {
type: "refresh_display",
html: ApplicationController.render(partial: "experiment/tree", locals: {experiment: Experiment.find(tool.experiment.id)}),
message: 'Starting job...' })
docs = fetch_docs_from_dataset(tool_id, experiment_id, user_id, tool_parameters.select{|t| t['name'] == 'dataset'}[0]['value'])
tool.results = {type:"documents", docs: docs}
tool.status = "finished"
tool.save!
experiment = Experiment.find(tool.experiment.id)
out = {
type: "refresh_display",
html: ApplicationController.render(partial: "experiment/tree", locals: {experiment: experiment}),
message: 'Done.'
}
ActionCable.server.broadcast("notifications.#{user_id}", out)
if continue
experiment.continue_from(tool_id)
end
if experiment.finished?
out = {
type: "experiment_finished",
message: 'Experiment has finished running.'
}
ActionCable.server.broadcast("notifications.#{user_id}", out)
end
def perform(tool_id, user_id, experiment_id, tool_type, tool_parameters, continue = false)
tool = Tool.find(tool_id)
tool.status = "running"
tool.save!
ActionCable.server.broadcast("notifications.#{user_id}", {
type: "refresh_display",
html: ApplicationController.render(partial: "experiment/tree", locals: { experiment: Experiment.find(tool.experiment.id) }),
message: "Starting job...",
})
docs = fetch_docs_from_dataset(tool_id, experiment_id, user_id, tool_parameters.select { |t| t["name"] == "dataset" }[0]["value"])
tool.results = { type: "documents", docs: docs }
tool.status = "finished"
tool.save!
experiment = Experiment.find(tool.experiment.id)
out = {
type: "refresh_display",
html: ApplicationController.render(partial: "experiment/tree", locals: { experiment: experiment }),
message: "Done.",
}
ActionCable.server.broadcast("notifications.#{user_id}", out)
if continue
experiment.continue_from(tool_id)
end
if experiment.finished?
out = {
type: "experiment_finished",
message: "Experiment has finished running.",
}
ActionCable.server.broadcast("notifications.#{user_id}", out)
end
end
def fetch_docs_from_dataset(tool_id, experiment_id, user_id, dataset_id)
d = Dataset.find(dataset_id)
all_docs = []
docs = []
page = 0
while page == 0 or docs.size == 100
page += 1
docs = d.fetch_paginated_documents(page, 100, "default", "asc", "all")[:docs]
all_docs.concat docs
out = {
type: "completion_rate",
tool_id: tool_id,
experiment_id: experiment_id,
completion: d.documents.size == 0 ? 0 : ((all_docs.size.to_f/d.documents.size)*100).to_i
}
ActionCable.server.broadcast("notifications.#{user_id}", out)
end
all_docs.map do |doc|
{
def fetch_docs_from_dataset(tool_id, experiment_id, user_id, dataset_id)
d = Dataset.find(dataset_id)
all_docs = []
docs = []
page = 0
while page == 0 or docs.size == 100
page += 1
docs = d.fetch_paginated_documents(page, 100, "default", "asc", "all")[:docs]
all_docs.concat docs
out = {
type: "completion_rate",
tool_id: tool_id,
experiment_id: experiment_id,
completion: d.documents.size == 0 ? 0 : ((all_docs.size.to_f / d.documents.size) * 100).to_i,
}
ActionCable.server.broadcast("notifications.#{user_id}", out)
end
all_docs.map do |doc|
{
id: doc.id,
newspaper: doc.newspaper,
language: doc.language,
text: doc.all_text,
date: doc.date_created
date: doc.date_created,
}
end
end
end
end
class SplitterWorker
include Sidekiq::Worker
def perform(tool_id, user_id, experiment_id, tool_type, tool_parameters)
end
include Sidekiq::Worker
def perform(tool_id, user_id, experiment_id, tool_type, tool_parameters)
end
end
......@@ -7,22 +7,33 @@ require "rails/all"
Bundler.require(*Rails.groups)
module NewspapersPlatform
class Application < Rails::Application
# Initialize configuration defaults for originally generated Rails version.
config.load_defaults 6.1
class Application < Rails::Application
# Initialize configuration defaults for originally generated Rails version.
config.load_defaults 6.1
# Configuration for the application, engines, and railties goes here.
#
# These settings can be overridden in specific environments using the files
# in config/environments, which are processed later.
#
# config.time_zone = "Central Time (US & Canada)"
# config.eager_load_paths << Rails.root.join("extras")
config.solr = config_for('solr')
if File.exist? "#{Rails.root}/config/auths.yml"
config.auths = config_for('auths')
end
config.active_job.queue_adapter = :sidekiq
config.iiif_sources = config_for("iiif_sources")
# Configuration for the application, engines, and railties goes here.
#
# These settings can be overridden in specific environments using the files
# in config/environments, which are processed later.
#
# config.time_zone = "Central Time (US & Canada)"
# config.eager_load_paths << Rails.root.join("extras")
config.solr = config_for("solr")
if File.exist? "#{Rails.root}/config/auths.yml"
config.auths = config_for("auths")
end
# config.active_job.queue_adapter = :sidekiq
config.iiif_sources = config_for("iiif_sources")
# Allow requests from different origins
# config.middleware.use Rack::Cors do
# allow do
# origins "*"
# resource "*",
# headers: :any,
# expose: %w(access-token expiry token-type uid client),
# methods: %i(post)
# end
# end
end
end
ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../Gemfile', __dir__)
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../Gemfile", __dir__)
require "bundler/setup" # Set up gems listed in the Gemfile.
require "bootsnap/setup" # Speed up boot time by caching expensive operations.
development:
adapter: redis
url: <%= ENV.fetch("REDIS_URL") { "redis://redis:6379/1" } %>
channel_prefix: newspapers_platform_production_dev
adapter: redis
url: <%= ENV.fetch("REDIS_URL") { "redis://localhost:6379/1" } %>
channel_prefix: newspapers_platform_production_dev
test:
adapter: test
......
GuFP+gXR5rvGVfgEYis0v/Rw/zXs4gnzV7WbFKql5Aau62F+BA/erMRTlm5iqNkcOW9WP4TNQiVraHKtVoGny9FAt7mcSI5h9oBz+TmV7WYJdm0aGv+D1MPxUZo9vXG/QQ3ESophQwXKUeMmw4WBtIt6v6zcCAVXTTWAZmLlYHRNxtoqM/ivDrWRT/CJugJMGjkzVNqfqgnSOQy5rC/SzStpthYm5YST+nR2+zedzir6XEiWyrQMy/0dMjxl+Mw7+vtXLGupdfpnJUdWu00YgoHJuHhs0opsUmLUA8lX2qTyDfIZfHgPoOhO86XXuzxlSNBJCh6NCg64DhCVdwZxks1ZUBqN/n/h7z46Wfjbc+mXE57LLRtQdW52wLZPNvfuxlaJOj3E1jhHzWBcrNwsOvBhOfLxhy9rFEZH--NwDMcOhk+Fv/AUWL--x0Lb5Ut4UAJoQ8K6rPDVbg==
\ No newline at end of file
......@@ -5,15 +5,14 @@
# gem 'sqlite3'
#
default: &default
adapter: postgresql
host: db
port: 5432
encoding: utf8
pool: 5
user: postgres
password: secret
timeout: 5000
adapter: postgresql
host: <%= ENV.fetch('NEP_DB_HOSTNAME', 'localhost') %>
port: 5432
encoding: utf8
pool: 5
user: postgres
password: secret
timeout: 5000
development:
<<: *default
......@@ -24,7 +23,7 @@ development:
# Do not set this db to the same as development or production.
test:
<<: *default
database: db/test.sqlite3
database: db_test
production:
<<: *default
......
......@@ -3,7 +3,7 @@ require "active_support/core_ext/integer/time"
Rails.application.configure do
# Settings specified here will take precedence over those in config/application.rb.
config.action_mailer.default_url_options = { host: 'localhost', port: 3000 }
config.action_mailer.default_url_options = { host: "localhost", port: 3000 }
# In the development environment your application's code is reloaded any time
# it changes. This slows down response time but is perfect for development
......@@ -18,13 +18,13 @@ Rails.application.configure do
# Enable/disable caching. By default caching is disabled.
# Run rails dev:cache to toggle caching.
if Rails.root.join('tmp', 'caching-dev.txt').exist?
if Rails.root.join("tmp", "caching-dev.txt").exist?
config.action_controller.perform_caching = true
config.action_controller.enable_fragment_cache_logging = true
config.cache_store = :memory_store
config.public_file_server.headers = {
'Cache-Control' => "public, max-age=#{2.days.to_i}"
"Cache-Control" => "public, max-age=#{2.days.to_i}",
}
else
config.action_controller.perform_caching = false
......@@ -73,9 +73,10 @@ Rails.application.configure do
# routes, locales, etc. This feature depends on the listen gem.
config.file_watcher = ActiveSupport::EventedFileUpdateChecker
# Uncomment if you wish to allow Action Cable access from any origin.
# config.action_cable.disable_request_forgery_protection = true
# allow all hosts in development
# config.hosts.clear
# Allow Action Cable access from any origin.
config.action_cable.url = ENV['NEP_CABLE_URL'] || 'http://127.0.0.1:3000/cable'
config.action_cable.allowed_request_origins = [ENV['NEP_LABS_DOMAIN'], 'http://127.0.0.1:8001']
# Allow requests from all hosts for now
config.hosts.clear
end
......@@ -13,7 +13,7 @@ Rails.application.configure do
config.eager_load = true
# Full error reports are disabled and caching is turned on.
config.consider_all_requests_local = false
config.consider_all_requests_local = false
config.action_controller.perform_caching = true
# Ensures that a master key has been made available in either ENV["RAILS_MASTER_KEY"]
......@@ -22,7 +22,7 @@ Rails.application.configure do
# Disable serving static files from the `/public` folder by default since
# Apache or NGINX already handles this.
config.public_file_server.enabled = ENV['RAILS_SERVE_STATIC_FILES'].present?
config.public_file_server.enabled = ENV["RAILS_SERVE_STATIC_FILES"].present?
# Compress CSS using a preprocessor.
# config.assets.css_compressor = :sass
......@@ -53,7 +53,7 @@ Rails.application.configure do
config.log_level = :info
# Prepend all log lines with the following tags.
config.log_tags = [ :request_id ]
config.log_tags = [:request_id]
# Use a different cache store in production.
# config.cache_store = :mem_cache_store
......@@ -89,9 +89,9 @@ Rails.application.configure do
# config.logger = ActiveSupport::TaggedLogging.new(Syslog::Logger.new 'app-name')
if ENV["RAILS_LOG_TO_STDOUT"].present?
logger = ActiveSupport::Logger.new(STDOUT)
logger = ActiveSupport::Logger.new(STDOUT)
logger.formatter = config.log_formatter
config.logger = ActiveSupport::TaggedLogging.new(logger)
config.logger = ActiveSupport::TaggedLogging.new(logger)
end
# Do not dump schema after migrations.
......
......@@ -19,11 +19,11 @@ Rails.application.configure do
# Configure public file server for tests with Cache-Control for performance.
config.public_file_server.enabled = true
config.public_file_server.headers = {
'Cache-Control' => "public, max-age=#{1.hour.to_i}"
"Cache-Control" => "public, max-age=#{1.hour.to_i}",
}
# Show full error reports and disable caching.
config.consider_all_requests_local = true
config.consider_all_requests_local = true
config.action_controller.perform_caching = false
config.cache_store = :null_store
......
# Be sure to restart your server when you modify this file.
# Version of your assets, change this if you want to expire all your assets.
Rails.application.config.assets.version = '1.0'
Rails.application.config.assets.version = "1.0"
# Add additional assets to the asset load path.
# Rails.application.config.assets.paths << Emoji.images_path
# Add Yarn node_modules folder to the asset load path.
Rails.application.config.assets.paths << Rails.root.join('node_modules')
Rails.application.config.assets.paths << Rails.root.join("node_modules")
# Precompile additional assets.
# application.js, application.css, and all non-JS/CSS in the app/assets
......
# Allow requests from Labs domain
module AddProxyRequestOrigin
extend ActionController::RequestForgeryProtection
allowed_request_origins = [ENV['NEP_LABS_DOMAIN'], 'http://127.0.0.1:8001']
def valid_request_origin? # :doc:
if forgery_protection_origin_check
# We accept blank origin headers because some user agents don't send it.
raise InvalidAuthenticityToken, NULL_ORIGIN_MESSAGE if request.origin == "null"
request.origin.nil? || request.origin == request.base_url || request.origin in allowed_request_origins
else
true
end
end
end
This diff is collapsed.
......@@ -2,5 +2,5 @@
# Configure sensitive parameters which will be filtered from the log file.
Rails.application.config.filter_parameters += [
:passw, :secret, :token, :_key, :crypt, :salt, :certificate, :otp, :ssn
:passw, :secret, :token, :_key, :crypt, :salt, :certificate, :otp, :ssn,
]
Rails.application.config.session_store :active_record_store, :key => '_web_session'
# Run this regularly to clean up DB: 'rails db:sessions:trim'
\ No newline at end of file
Rails.application.config.session_store :cookie_store, :key => "_web_session"
# Run this regularly to clean up DB: 'rails db:sessions:trim'
redis = { url: ENV.fetch("REDIS_URL", "redis://localhost:6379/1") }
Sidekiq.configure_server do |config|
config.redis = { url: 'redis://redis:6379/1' }
config.redis = redis
end
Sidekiq.configure_client do |config|
config.redis = { url: 'redis://redis:6379/1' }
config.redis = redis
end
Warden::Manager.after_set_user do |user, auth, opts|
auth.cookies.signed["user.id"] = user.id
auth.cookies.signed["user.expires_at"] = 30.minutes.from_now
end
Warden::Manager.before_logout do |user, auth, opts|
auth.cookies.signed["user.id"] = nil
auth.cookies.signed["user.expires_at"] = nil
end
\ No newline at end of file
# Additional translations at https://github.com/heartcombo/devise/wiki/I18n
en:
devise:
confirmations:
confirmed: "Your email address has been successfully confirmed."
send_instructions: "You will receive an email with instructions for how to confirm your email address in a few minutes."
send_paranoid_instructions: "If your email address exists in our database, you will receive an email with instructions for how to confirm your email address in a few minutes."
failure:
already_authenticated: "You are already signed in."
inactive: "Your account is not activated yet."
invalid: "Invalid %{authentication_keys} or password."
locked: "Your account is locked."
last_attempt: "You have one more attempt before your account is locked."
not_found_in_database: "Invalid %{authentication_keys} or password."
timeout: "Your session expired. Please sign in again to continue."
unauthenticated: "You need to sign in or sign up before continuing."
unconfirmed: "You have to confirm your email address before continuing."
mailer:
confirmation_instructions:
subject: "Confirmation instructions"
reset_password_instructions:
subject: "Reset password instructions"
unlock_instructions:
subject: "Unlock instructions"
email_changed:
subject: "Email Changed"
password_change:
subject: "Password Changed"
omniauth_callbacks:
failure: "Could not authenticate you from %{kind} because \"%{reason}\"."
success: "Successfully authenticated from %{kind} account."
passwords:
no_token: "You can't access this page without coming from a password reset email. If you do come from a password reset email, please make sure you used the full URL provided."
send_instructions: "You will receive an email with instructions on how to reset your password in a few minutes."
send_paranoid_instructions: "If your email address exists in our database, you will receive a password recovery link at your email address in a few minutes."
updated: "Your password has been changed successfully. You are now signed in."
updated_not_active: "Your password has been changed successfully."
registrations:
destroyed: "Bye! Your account has been successfully cancelled. We hope to see you again soon."
signed_up: "Welcome! You have signed up successfully."
signed_up_but_inactive: "You have signed up successfully. However, we could not sign you in because your account is not yet activated."
signed_up_but_locked: "You have signed up successfully. However, we could not sign you in because your account is locked."
signed_up_but_unconfirmed: "A message with a confirmation link has been sent to your email address. Please follow the link to activate your account."
update_needs_confirmation: "You updated your account successfully, but we need to verify your new email address. Please check your email and follow the confirmation link to confirm your new email address."
updated: "Your account has been updated successfully."
updated_but_not_signed_in: "Your account has been updated successfully, but since your password was changed, you need to sign in again."
sessions:
signed_in: "Signed in successfully."
signed_out: "Signed out successfully."
already_signed_out: "Signed out successfully."
unlocks:
send_instructions: "You will receive an email with instructions for how to unlock your account in a few minutes."
send_paranoid_instructions: "If your account exists, you will receive an email with instructions for how to unlock it in a few minutes."
unlocked: "Your account has been unlocked successfully. Please sign in to continue."
errors:
messages:
already_confirmed: "was already confirmed, please try signing in"
confirmation_period_expired: "needs to be confirmed within %{period}, please request a new one"
expired: "has expired, please request a new one"
not_found: "not found"
not_locked: "was not locked"
not_saved:
one: "1 error prohibited this %{resource} from being saved:"
other: "%{count} errors prohibited this %{resource} from being saved:"