Skip to content
......@@ -51,5 +51,5 @@
<% else %>
<%= facet['val'] %>
<% end %>
<span class="badge rounded-pill bg-primary float-end"><%= facet['count'] %></span>
<span class="badge rounded-pill bg-info float-end"><%= facet['count'] %></span>
</li>
\ No newline at end of file
......@@ -11,10 +11,10 @@
<%= options_for_select(current_user.datasets.map{ |d| ["#{d.title} (#{d.documents.size} docs)", d.id]}, session[:working_dataset]) %>
</select>
</div>
<button class="btn btn-sm btn-secondary" data-action="click->search-results#addSelectedDocumentsToWorkingDataset">
<button class="btn btn-sm btn-info" data-action="click->search-results#addSelectedDocumentsToWorkingDataset">
Add selected documents
</button>
<button class="btn btn-sm btn-secondary"
<button class="btn btn-sm btn-info"
data-search-params="<%= @solr_params.to_json %>"
data-action="click->search-results#addAllDocumentsToWorkingDataset">
Add all documents
......
......@@ -5,7 +5,7 @@
<% search_constraints.each do |constraint| %>
<% field = t('newspapers.solr_fields').find { |key,value| value == constraint[:label] }[0].to_s %>
<% next if field == "doc_type" %>
<span class="btn-group">
<span class="btn-group btn-group-sm">
<button class="btn btn-sm btn-outline-primary" disabled>
<span class="filter_name">
<%= t("newspapers.human_readable_solr_fields." + field) %>
......@@ -39,9 +39,7 @@
<% else %>
<% new_params[:f][constraint[:label].to_sym].delete(constraint[:value]) %>
<% end %>
<a class="btn btn-sm btn-outline-primary" href="<%= url_for new_params %>">
<i class="fas fa-times"></i>
</a>
<a class="btn btn-sm btn-outline-primary" href="<%= url_for new_params %>"><span class="fas fa-times"></span></a>
</span>
<% end %>
</div>
......
......@@ -27,7 +27,7 @@
<% end %>
</div>
<div class="col-4">
<a href="<%= url %>"><img class="h-auto w-100" src="<%= result.get_thumbnail %>"/></a>
<a href="<%= url %>"><img class="h-auto w-auto" src="<%= result.get_thumbnail %>"/></a>
</div>
</div>
<div class="in_datasets">
......
......@@ -4,7 +4,7 @@
Currently belongs to:
<% in_datasets.each do |dataset_id, dataset_title| %>
<%= link_to "/dataset/#{dataset_id}" do %>
<span class="dataset-<%= dataset_id %> badge rounded-pill bg-primary"><%= dataset_title %></span>
<span class="dataset-<%= dataset_id %> badge rounded-pill bg-info"><%= dataset_title %></span>
<% end %>
<% end %>
</p>
......
<div id="search_forms">
<div id="search_form" class="mb-2">
<form action="/search" method="get" class="row">
<div class="col-3 btn-group" role="group">
<form action="./search" method="get" class="row">
<div class="col-3 btn-group h-50" role="group">
<input type="radio" class="btn-check" name="search_type" value="exact" id="exact_search" autocomplete="off" <%= "checked" if @search_type.nil? || @search_type == "exact" %>>
<label class="btn btn-outline-primary" for="exact_search">Exact search</label>
<input type="radio" class="btn-check" name="search_type" value="stemmed" id="stemmed_search" autocomplete="off" <%= "checked" if @search_type == "stemmed" %>>
<label class="btn btn-outline-primary" for="stemmed_search">Stemmed search</label>
</div>
<div class="col-9 w-50 input-group flex-nowrap">
<span class="input-group-text" id="addon-wrapping">Search for:</span>
<input name="q" type="text" class="form-control" value="<%= @solr_params[:q] if @solr_params and @solr_params[:q] %>">
<input class="btn btn-outline-secondary" type="submit" value="Go"></input>
<div class="col w-50 input-group mb-3 flex-nowrap">
<span class="input-group-text mb-3" id="addon-wrapping">Search for:</span>
<input name="q" type="text" class="form-control mb-3" value="<%= @solr_params[:q] if @solr_params and @solr_params[:q] %>">
<input class="btn btn-outline-secondary mb-3" type="submit" value="Go">
</div>
</form>
</div>
......
<% set_page_title "Newspaper Platform" %>
<div class="d-flex flex-column justify-content-center align-items-center">
<div class="d-flex align-items-center justify-content-between" style="height: 30vh;">
<%= image_pack_tag "media/images/newseye_logo.png", class: "mh-100 ms-auto me-auto d-block" %>
<div class="d-flex flex-column justify-content-center align-items-center">
<%= image_pack_tag "media/images/eu_flag_yellow.jpg", size: "224x150"%>
<p class="w-50">
<div class="row justify-content-center align-items-center">
<div class="col" style="text-align: center">
<p style="background:white; display: inline-block; padding: 10px"><%= image_pack_tag "media/images/info_symbol.png", size: "35x35", style: "padding:5px" %> To start the Annolyzer<br> click on 'Search' or 'Datasets'</p>
</div>
<div class="col" style="text-align: center">
<%= image_pack_tag "media/images/newseye_logo.png", class: "mh-100 ms-auto me-auto d-block", size: "223x183", style: "margin:30px" %>
<%= image_pack_tag "media/images/eu_flag_yellow.jpg", class: "mh-100 ms-auto me-auto d-block", size: "157x105", style: "margin:30px" %>
<p class="w-50" style="display: inline-block">
This project has received funding from the
European Union’s Horizon 2020 research and
innovation programme under grant agreement
No 770299.
</p>
</div>
</div>
<div class="mt-5 w-75">
<h4>Available data</h4>
<p>
Grâce à cette plateforme, il est possible de rechercher de l’information dans la vingtaine de titres disponibles, en cinq langues différentes.
Ces titres de presse ont été fournis par les trois bibliothèques partenaires du projet : la Bibliothèque Nationale de France (BNF),
la Bibliothèque Nationale d’Autriche (ONB) et la Bibliothèque Nationale de Finlande (NLF). En voici la liste :
<ul class="list-group list-group-horizontal">
<li class="list-group-item">
<h5>BNF (French)</h5>
<ul class="list-group list-group-flush">
<li class="list-group-item"><span class="text-decoration-underline">La Presse</span> : 1850-1890</li>
<li class="list-group-item"><span class="text-decoration-underline">Le Matin</span> : 1884-1944</li>
<li class="list-group-item"><span class="text-decoration-underline">La Fronde</span> : 1897-1929</li>
<li class="list-group-item"><span class="text-decoration-underline">Marie-Claire</span> : 1937-1944</li>
<li class="list-group-item"><span class="text-decoration-underline">L’Œuvre</span> : 1915-1944</li>
<li class="list-group-item"><span class="text-decoration-underline">Le Gaulois</span> : 1868-1900</li>
</ul>
</li>
<li class="list-group-item">
<h5>ONB (German)</h5>
<ul class="list-group list-group-flush">
<li class="list-group-item"><span class="text-decoration-underline">Neue Freie Presse</span> : 1864-1873, 1895-1900, 1911-1922 et 1933-1939</li>
<li class="list-group-item"><span class="text-decoration-underline">Illustrierte Kronen Zeitung</span> : 1911-1922 et 1933-1939</li>
<li class="list-group-item"><span class="text-decoration-underline">Innsbrucker Nachrichten</span> : 1864-1873, 1895-1900, 1911-1922 et 1933-1939</li>
<li class="list-group-item"><span class="text-decoration-underline">Arbeiter Zeitung</span> : 1895-1900, 1911-1922 et 1933-1939</li>
</ul>
</li>
<li class="list-group-item">
<h5>NLF (Finnish)</h5>
<ul class="list-group list-group-flush">
<li class="list-group-item"><span class="text-decoration-underline">Sanomia Turusta</span> : 1850-1900</li>
<li class="list-group-item"><span class="text-decoration-underline">Aura</span> : 1880-1896</li>
<li class="list-group-item"><span class="text-decoration-underline">Uusi Aura</span> : 1897-1918</li>
<li class="list-group-item"><span class="text-decoration-underline">Suometar</span> : 1847-1866</li>
<li class="list-group-item"><span class="text-decoration-underline">Uusi Suometar</span> : 1869-1918</li>
<li class="list-group-item"><span class="text-decoration-underline">Païvalehti</span> : 1889-1904</li>
<li class="list-group-item"><span class="text-decoration-underline">Helsingin Sanomat</span> : 1904-1918</li>
</ul>
</li>
<li class="list-group-item">
<h5>NLF (Swedish)</h5>
<ul class="list-group list-group-flush">
<li class="list-group-item"><span class="text-decoration-underline">Åbo Underrättelser</span> : 1824-1827 et 1829-1918</li>
<li class="list-group-item"><span class="text-decoration-underline">Västra Finland</span> : 1895-1918</li>
<li class="list-group-item"><span class="text-decoration-underline">Hufvudstadsbladet</span> : 1864-1918</li>
</ul>
</li>
</ul>
</p>
</div>
</div>
\ No newline at end of file
......@@ -10,19 +10,19 @@
<a href="/dataset/<%= dataset.id %>"><%= dataset.title %></a>
</div>
<div class="d-flex align-items-center justify-content-center">
<span class="badge bg-primary rounded-pill"><%= dataset.nb_issues %> issues</span>
<span class="badge bg-primary rounded-pill"><%= dataset.nb_articles %> articles</span>
<span class="badge bg-primary rounded-pill"><%= dataset.nb_compound_articles %> compound articles</span>
<span class="badge bg-info rounded-pill"><%= dataset.nb_issues %> issues</span>
<span class="badge bg-info rounded-pill"><%= dataset.nb_articles %> articles</span>
<span class="badge bg-info rounded-pill"><%= dataset.nb_compound_articles %> compound articles</span>
</div>
<div class="d-inline-flex">
<button type="button"
class="btn btn-sm btn-danger px-2 d-flex align-items-center justify-content-between me-2"
class="btn btn-sm btn-danger text-white px-2 d-flex align-items-center justify-content-between me-2"
data-action="click->datasets#deleteDataset">
<i class="me-2 fas fa-trash"></i>Delete
</button>
<button type="button"
data-bs-toggle="modal" data-bs-target="#renameDatasetModal" data-bs-dataset-id="<%= dataset.id %>"
class="btn btn-sm btn-info px-2 d-flex align-items-center justify-content-between">
class="btn btn-sm btn-info text-white px-2 d-flex align-items-center justify-content-between">
<i class="me-2 fas fa-edit"></i>Rename
</button>
</div>
......
......@@ -3,15 +3,13 @@
data-action="click->dataset#toggleResultSelection"
data-doc-id="<%= doc.id %>">
<h5>
<%
if doc.is_a?(Article)
url = "/catalog/#{doc.issue_id}?selected=#{doc.id}"
<% if doc.is_a?(Article)
url = "/catalog/#{doc.issue_id}?selected=#{doc.id}"
elsif doc.is_a?(CompoundArticle)
url = "/catalog/#{doc.issue_id}?selected_compound=#{doc.id}"
url = "/catalog/#{doc.issue_id}?selected_compound=#{doc.id}"
else
url = "/catalog/#{doc.id}"
end
%>
url = "/catalog/#{doc.id}"
end %>
<a href="<%= url %>">
<span class="doc_index"><%= doc_index %>.</span>
<%= doc.is_a?(Article) ? doc.id : doc.title %>
......@@ -30,13 +28,13 @@
</div>
<div class="col-4">
<% if !highlight.nil? and !highlight.first.nil? %>
<% highlight.first[1].each do |extract| %>
<p>...<%= extract.gsub(/\n/,'<br/>').gsub('<br/><br/>','<br/>').html_safe %>...</p>
<% end %>
<%# highlight.first[1].each do |extract| %>
<p><%= highlight[0..250] + '...' unless doc.is_a?(Issue) %><%#= extract.gsub(/\n/, "<br/>").gsub("<br/><br/>", "<br/>").html_safe %></p>
<%# end %>
<% end %>
</div>
<div class="col-4">
<a href="<%= url %>"><img src="<%= doc.thumbnail_url %>"/></a>
<a href="<%= url %>"><img class="h-auto w-auto" src="<%= doc.thumbnail_url %>"/></a>
</div>
</div>
</div>
\ No newline at end of file
<div class="list-group list-group-flush w-100">
<% docs.each_with_index do |doc, idx| %>
<%# Highlight will be used when searching into a dataset %>
<%= render partial: "document", locals: {doc: doc, highlight: nil, doc_index: (pagenum-1) * rows + idx + 1} %>
<%= render partial: "document", locals: { doc: doc, highlight: doc.all_text, doc_index: (pagenum - 1) * rows + idx + 1 } %>
<hr/>
<% end %>
</div>
\ No newline at end of file
......@@ -7,9 +7,18 @@
<% if (nb_pages > 10) %>
<% (1..nb_pages).each do |i| %>
<% if (i >= current_page-2 and i <= current_page+2) or (i <= 1) or (i>= nb_pages) %>
<li class="page-item<%= " active" if current_page == i %>" data-action="click->dataset#page_button">
<a class="page-link" href="#"><%= i %></a>
</li>
<% if i == current_page %>
<li class="page-item active">
<form class="page-link" data-action="dataset#page_select">
<input type="text" data-dataset-target="inputPage" value="<%= i %>" style="width:25px; height:20px; text-align:center">
<input type="submit" class="sr-only" tabindex="-1">
</form>
</li>
<% else %>
<li class="page-item" data-action="click->dataset#page_button">
<a class="page-link" href="#"><%= i %></a>
</li>
<% end %>
<% elsif (i == 2 and current_page >= 5) or (i == nb_pages-1 and current_page <= nb_pages-4) %>
<li class="page-item disabled">
<a class="page-link" href="#">...</a>
......@@ -18,9 +27,18 @@
<% end %>
<% else %>
<% (1..nb_pages).each do |i| %>
<li class="page-item<%= " active" if current_page == i %>" data-action="click->dataset#page_button">
<a class="page-link" href="#"><%= i %></a>
</li>
<% if i == current_page %>
<li class="page-item active">
<form class="page-link" data-action="dataset#page_select">
<input type="text" data-dataset-target="inputPage" value="<%= i %>" style="width:25px; height:20px; text-align:center">
<input type="submit" class="sr-only" tabindex="-1">
</form>
</li>
<% else %>
<li class="page-item" data-action="click->dataset#page_button">
<a class="page-link" href="#"><%= i %></a>
</li>
<% end %>
<% end %>
<% end %>
<li class="page-item<%= " disabled" if current_page == nb_pages %>" data-action="click->dataset#next_page">
......
......@@ -28,7 +28,8 @@
</td>
<td>
<% user = User.find(d.user_id) %>
<% username = user.email[0...user.email.index('@')] %>
<%# <% username = user.email[0...user.email.index('@')] %>
<% username = user.labs_user_name %>
<%= username %>
</td>
<td>
......
......@@ -2,7 +2,8 @@
$("#notifications").append("<%= j render(partial: "shared/notification",
locals: {
notif_title: "Dataset modified",
notif_content: content
notif_content: content,
notif_autohide: "true"
}) %>")
for(const notif of $('.toast')) {
......
......@@ -2,7 +2,8 @@
$("#notifications").append("<%= j render(partial: "shared/notification",
locals: {
notif_title: "Working dataset",
notif_content: content
notif_content: content,
notif_autohide: "true"
}) %>")
for(let notif of $('.toast')) {
......
......@@ -3,8 +3,9 @@
<div class="row"
data-controller="dataset"
data-dataset-id-value="<%= @dataset.id %>"
data-dataset-current-page-value="1"
data-dataset-per-page-value="10"
data-dataset-current-page-value="<%= @current_page %>"
data-dataset-nb-pages-value="<%= @nb_pages %>"
data-dataset-per-page-value="<%= @per_page %>"
data-dataset-sort-value="default"
data-dataset-sort-order-value="asc"
data-dataset-selected-documents-value="">
......
......@@ -32,6 +32,6 @@
<%= yield %>
</div>
<div id="notifications" class="toast-container position-absolute bottom-0 end-0 p-3"></div>
<div id="notifications" class="toast-container position-fixed bottom-0 end-0 p-3"></div>
</body>
</html>
<div id="navigation" class="d-flex align-items-center px-3 mb-2 border-bottom border-2 bg-light">
<div id="navigation" class="d-flex align-items-center px-3 mb-2 border-bottom border-2 bg-light" style="border-top:7px solid;border-top-color:#b3c3c7">
<nav class="w-100 p-0 navbar navbar-expand-lg">
<a class="navbar-brand" href="/">
<%= image_pack_tag "media/images/newseye_logo_small.png" %>
<%= image_pack_tag "media/images/newseye_logo_small.png", size: "65x40", style: "padding:5px" %>
</a>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarContent" aria-controls="navbarContent" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
......@@ -15,18 +15,9 @@
<li class="me-5 nav-item">
<a class="<%= "border-bottom border-4" if request.path.include? "/dataset" %> h5 nav-link" href="/datasets">Datasets</a>
</li>
<li class="me-5 nav-item">
<a class="<%= "border-bottom border-4" if request.path.include? "/experiment" %> h5 nav-link" href="/experiments">Experiments</a>
</li>
<li class="me-5 nav-item dropdown" style="margin-left: 10vw">
<a class="h5 nav-link dropdown-toggle" href="#" id="profileDropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
<i class="fas fa-user"></i>
</a>
<ul class="dropdown-menu" aria-labelledby="profileDropdown">
<%= link_to "Edit Profile", edit_user_registration_path, class: "dropdown-item" %>
<%= link_to "Disconnect", destroy_user_session_path, method: :delete, class: "dropdown-item" %>
</ul>
</li>
<!-- <li class="me-5 nav-item">-->
<!-- <a class="<%= "border-bottom border-4" if request.path.include? "/experiment" %> h5 nav-link" href="/experiments">Experiments</a>-->
<!-- </li>-->
<!-- <li class="me-5 nav-item dropdown">-->
<!-- <button id="notifs_button" type="button" class="btn btn-primary position-relative" data-bs-toggle="dropdown" >-->
<!-- Notifications-->
......@@ -49,9 +40,6 @@
<!-- <li class="dropdown-item"><%#= link_to "EN", "/locale/en", remote: true %></li>-->
<!-- </ul>-->
<!-- </li>-->
<li class="me-5 nav-item">
<a class="h5 nav-link">Help</a>
</li>
<% else %>
<li class="nav-item dropdown">
<a class="h5 nav-link dropdown-toggle" href="#" id="profileDropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
......
<div class="toast" role="alert" aria-live="assertive" aria-atomic="true">
<div class="toast" role="alert" aria-live="assertive" aria-atomic="true" data-bs-autohide=<%= notif_autohide %>>
<div class="toast-header">
<strong class="me-auto"><%= notif_title %></strong>
<small class="text-muted">just now</small>
......
require "zip"
class ExportDatasetWorker
include Sidekiq::Worker
include ActionView::Helpers::FormOptionsHelper
include Sidekiq::Worker
include ActionView::Helpers::FormOptionsHelper
def perform(user_id, dataset_id, export_type)
dataset = Dataset.find(dataset_id)
file = Tempfile.new(["export_#{dataset.title.parameterize(separator: '_')}_", ".#{export_type}"], "tmp")
to_write = []
named_entities = dataset.named_entities
named_entities = named_entities.values.map{|h| h.values }.flatten
documents = dataset.fetch_paginated_documents(1, 100, "default", "asc", "all", recursive=true)
documents[:docs].map do |doc|
case export_type
when "json"
lang = doc.language
thumb = doc.thumbnail_url
if doc.is_a?(CompoundArticle)
doc_type = "compound"
entities = named_entities.select{ |ne| doc.parts.include? ne['article_id_ssi'] }
elsif doc.is_a?(Article)
doc_type = "article"
entities = named_entities.select{ |ne| doc.id == ne['article_id_ssi'] }
elsif doc.is_a?(Issue)
doc_type = "issue"
entities = named_entities.select{ |ne| doc.id == ne['issue_id_ssi'] }
end
entities = entities.map do |ne|
{
mention: ne['mention_ssi'],
indexStart: ne['article_index_start_isi'],
indexEnd: ne['article_index_end_isi'],
stance: if ne['stance_fsi'] == 0
"neutral"
else
ne['stance_fsi'] > 0 ? "positive" : "negative"
end,
linked_entity_url: ne['linked_entity_ssi'] == "" || ne['linked_entity_ssi'].nil? ? nil : "https://www.wikidata.org/wiki/#{ne['linked_entity_ssi'].split('_')[-1]}"
}
end
unless under_copyright(lang, doc.date_created, User.find(user_id))
to_write << { id: doc.id,
type: doc_type,
language: lang,
date: doc.date_created,
newspaper_id: doc.newspaper,
iiif_url: thumb,
text: doc.all_text,
named_entities: entities }
end
when "zip"
filename = "#{doc.date_created}_#{doc.is_a?(CompoundArticle) ? "compound_#{doc.title}" : doc.id}.txt"
file_content = doc.all_text
File.open("/tmp/#{filename}", 'w') do |f|
f.write file_content
end
to_write << filename
end
end
case export_type
when "json"
to_write = {"documents": to_write}
file.write to_write.to_json
file.close
when "zip"
Zip::File.open(file.path, Zip::File::CREATE) do |zipfile|
to_write.each do |filename|
zipfile.add filename, "/tmp/#{filename}" if filename
end
end
to_write.each do |filename|
File.delete("/tmp/#{filename}") if filename
end
end
content = "<p>Your dataset is ready. <a target=\"_blank\" href=\"/send?filename=#{File.basename(file.path)}\">Click here</a> to download it.</p>"
def perform(user_id, dataset_id, export_type, time)
dataset = Dataset.find(dataset_id)
file = Tempfile.new(["export_#{dataset.title.parameterize(separator: "_")}_", ".#{export_type}"], "tmp")
to_write = []
named_entities = dataset.named_entities
named_entities = named_entities.values.map { |h| h.values }.flatten
documents = dataset.fetch_paginated_documents(1, 100, "default", "asc", "all", recursive = true)
numDocuments = documents[:docs].size
documents[:docs].map.with_index do |doc, idx|
increase = (100 * (idx + 1) / numDocuments).to_i - (100 * idx / numDocuments).to_i
if increase > 0
completion = (100 * (idx + 1) / numDocuments).to_i
ActionCable.server.broadcast("notifications.#{user_id}", {
type: "notify",
html: ApplicationController.render(partial: "shared/notification", locals: {notif_title: dataset.title, notif_content: content})
type: "completion_rate",
dataset_id: dataset_id,
completion: completion,
time: time,
})
end
case export_type
when "json"
lang = doc.language
thumb = doc.thumbnail_url
if doc.is_a?(CompoundArticle)
doc_type = "compound"
entities = named_entities.select { |ne| doc.parts.include? ne["article_id_ssi"] }
elsif doc.is_a?(Article)
doc_type = "article"
entities = named_entities.select { |ne| doc.id == ne["article_id_ssi"] }
elsif doc.is_a?(Issue)
doc_type = "issue"
entities = named_entities.select { |ne| doc.id == ne["issue_id_ssi"] }
end
entities = entities.map do |ne|
{
mention: ne["mention_ssi"],
indexStart: ne["article_index_start_isi"],
indexEnd: ne["article_index_end_isi"],
stance: if ne["stance_fsi"] == 0
"neutral"
else
ne["stance_fsi"] > 0 ? "positive" : "negative"
end,
linked_entity_url: ne["linked_entity_ssi"] == "" || ne["linked_entity_ssi"].nil? ? nil : "https://www.wikidata.org/wiki/#{ne["linked_entity_ssi"].split("_")[-1]}",
}
end
unless under_copyright(lang, doc.date_created, User.find(user_id))
to_write << { id: doc.id,
type: doc_type,
language: lang,
date: doc.date_created,
newspaper_id: doc.newspaper,
iiif_url: thumb,
text: doc.all_text,
named_entities: entities }
end
when "zip"
filename = "#{doc.date_created}_#{doc.is_a?(CompoundArticle) ? "compound_#{doc.title}" : doc.id}.txt"
file_content = doc.all_text
File.open("/tmp/#{filename}", "w") do |f|
f.write file_content
end
to_write << filename
end
end
def under_copyright(lang, date, user)
nlf_doc = ["fi", "se"].include? lang
nlf_under_copyright = Date.parse("1910-12-31T00:00:00Z") <= Date.parse(date)
nlf_doc and nlf_under_copyright and !user.groups.include? "researcher"
case export_type
when "json"
to_write = { "documents": to_write }
file.write to_write.to_json
file.close
when "zip"
Zip::File.open(file.path, Zip::File::CREATE) do |zipfile|
to_write.each do |filename|
zipfile.add filename, "/tmp/#{filename}" if filename
end
end
to_write.each do |filename|
File.delete("/tmp/#{filename}") if filename
end
end
end
\ No newline at end of file
content = "<p>Your dataset is ready. <a target=\"_blank\" href=\"/en/tool/annolyzer/send?filename=#{File.basename(file.path)}\">Click here</a> to download it.</p>"
ActionCable.server.broadcast("notifications.#{user_id}", {
type: "notify",
html: ApplicationController.render(partial: "shared/notification", locals: { notif_title: dataset.title, notif_content: content, notif_autohide: "false" }),
})
end
def under_copyright(lang, date, user)
nlf_doc = ["fi", "se"].include? lang
nlf_under_copyright = Date.parse("1910-12-31T00:00:00Z") <= Date.parse(date)
nlf_doc and nlf_under_copyright and !user.groups.include? "researcher"
end
end
class NgramsWorker
include Sidekiq::Worker
include Sidekiq::Worker
def perform(tool_id, user_id, experiment_id, tool_type, tool_parameters, continue=false)
tool = Tool.find(tool_id)
tool.status = "running"
tool.save!
ActionCable.server.broadcast("notifications.#{user_id}", {
type: "refresh_display",
html: ApplicationController.render(partial: "experiment/tree", locals: {experiment: Experiment.find(tool.experiment.id)}),
message: 'Starting job...' })
parent_output = Tool.find(tool.parent_id).results
docs = parent_output["docs"].map{ |doc| doc['text'] }
n = tool_parameters.select{|t| t['name'] == 'n'}[0]['value'].to_i
min_freq = tool_parameters.select{|t| t['name'] == 'minimum_frequency'}[0]['value'].to_i
ngrams = find_ngrams(tool_id, experiment_id, user_id, docs, n, min_freq)
tool.results = {type:"ngrams", ngrams: ngrams}
tool.status = "finished"
tool.save!
experiment = Experiment.find(tool.experiment.id)
out = {
type: "refresh_display",
html: ApplicationController.render(partial: "experiment/tree", locals: {experiment: experiment}),
message: 'Done.'
}
ActionCable.server.broadcast("notifications.#{user_id}", out)
if continue
experiment.continue_from(tool_id)
end
if experiment.finished?
out = {
type: "experiment_finished",
message: 'Experiment has finished running.'
}
ActionCable.server.broadcast("notifications.#{user_id}", out)
end
def perform(tool_id, user_id, experiment_id, tool_type, tool_parameters, continue = false)
tool = Tool.find(tool_id)
tool.status = "running"
tool.save!
ActionCable.server.broadcast("notifications.#{user_id}", {
type: "refresh_display",
html: ApplicationController.render(partial: "experiment/tree", locals: { experiment: Experiment.find(tool.experiment.id) }),
message: "Starting job...",
})
parent_output = Tool.find(tool.parent_id).results
docs = parent_output["docs"].map { |doc| doc["text"] }
n = tool_parameters.select { |t| t["name"] == "n" }[0]["value"].to_i
min_freq = tool_parameters.select { |t| t["name"] == "minimum_frequency" }[0]["value"].to_i
ngrams = find_ngrams(tool_id, experiment_id, user_id, docs, n, min_freq)
tool.results = { type: "ngrams", ngrams: ngrams }
tool.status = "finished"
tool.save!
experiment = Experiment.find(tool.experiment.id)
out = {
type: "refresh_display",
html: ApplicationController.render(partial: "experiment/tree", locals: { experiment: experiment }),
message: "Done.",
}
ActionCable.server.broadcast("notifications.#{user_id}", out)
if continue
experiment.continue_from(tool_id)
end
if experiment.finished?
out = {
type: "experiment_finished",
message: "Experiment has finished running.",
}
ActionCable.server.broadcast("notifications.#{user_id}", out)
end
end
def find_ngrams(tool_id, experiment_id, user_id, documents, n, minimum_frequency)
total = {}
documents.each_with_index do |document, idx|
out = {
type: "completion_rate",
tool_id: tool_id,
experiment_id: experiment_id,
completion: ((idx/(documents.size).to_f)*100).to_i
}
ActionCable.server.broadcast("notifications.#{user_id}", out) if idx%20 == 0
ngrams = document.split.each_cons(n).to_a
ngrams.reject! { |w1, w2| w1 !~ /^\w+/ || w2 !~ /^\w+/ }
ngrams.map!{ |ngram| ngram.join(' ') }
total.merge!( ngrams.each_with_object(Hash.new(0)) do |word, obj|
obj[word.downcase] += 1
end)
end
total.sort_by { |k, v| -v }.reject { |k, v| v < minimum_frequency }
def find_ngrams(tool_id, experiment_id, user_id, documents, n, minimum_frequency)
total = {}
documents.each_with_index do |document, idx|
out = {
type: "completion_rate",
tool_id: tool_id,
experiment_id: experiment_id,
completion: ((idx / (documents.size).to_f) * 100).to_i,
}
ActionCable.server.broadcast("notifications.#{user_id}", out) if idx % 20 == 0
ngrams = document.split.each_cons(n).to_a
ngrams.reject! { |w1, w2| w1 !~ /^\w+/ || w2 !~ /^\w+/ }
ngrams.map! { |ngram| ngram.join(" ") }
total.merge!(ngrams.each_with_object(Hash.new(0)) do |word, obj|
obj[word.downcase] += 1
end)
end
total.sort_by { |k, v| -v }.reject { |k, v| v < minimum_frequency }
end
end