Source code for hepdata.modules.records.api

# -*- coding: utf-8 -*-
#
# This file is part of HEPData.
# Copyright (C) 2016 CERN.
#
# HEPData is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# HEPData is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with HEPData; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.

"""API for HEPData-Records."""
import os
from collections import OrderedDict
from functools import wraps

import time
from celery import shared_task
from flask import redirect, request, render_template, jsonify, current_app, Response, abort, flash
from flask_login import current_user
from invenio_accounts.models import User
from invenio_db import db
from sqlalchemy import and_
from sqlalchemy.orm.exc import NoResultFound
from werkzeug.utils import secure_filename

from hepdata.modules.converter import convert_oldhepdata_to_yaml
from hepdata.modules.email.api import send_cookie_email
from hepdata.modules.email.utils import create_send_email_task
from hepdata.modules.permissions.api import user_allowed_to_perform_action
from hepdata.modules.permissions.models import SubmissionParticipant
from hepdata.modules.records.subscribers.api import is_current_user_subscribed_to_record
from hepdata.modules.records.utils.common import decode_string, find_file_in_directory, allowed_file, \
    remove_file_extension, truncate_string, get_record_contents, get_record_by_id
from hepdata.modules.records.utils.data_processing_utils import process_ctx
from hepdata.modules.records.utils.data_files import get_data_path_for_record, cleanup_old_files
from hepdata.modules.records.utils.submission import process_submission_directory, create_data_review, cleanup_submission
from hepdata.modules.submission.api import get_latest_hepsubmission, get_submission_participants_for_record
from hepdata.modules.records.utils.users import get_coordinators_in_system, has_role
from hepdata.modules.records.utils.workflow import update_action_for_submission_participant
from hepdata.modules.records.utils.yaml_utils import split_files
from hepdata.modules.stats.views import increment, get_count
from hepdata.modules.submission.models import RecordVersionCommitMessage, DataSubmission, HEPSubmission, DataReview
from hepdata.utils.file_extractor import extract
from hepdata.utils.miscellaneous import sanitize_html
from hepdata.utils.users import get_user_from_id
from bs4 import BeautifulSoup
from hepdata_converter_ws_client import Error

import tempfile
import shutil

import logging
logging.basicConfig()
log = logging.getLogger(__name__)

RECORD_PLAIN_TEXT = {
    "passed": "passed review",
    "attention": "attention required",
    "todo": "to be reviewed"
}


[docs]def returns_json(f): @wraps(f) def decorated_function(*args, **kwargs): r = f(*args, **kwargs) return Response(r, content_type='application/json; charset=utf-8') return decorated_function
[docs]def format_submission(recid, record, version, version_count, hepdata_submission, data_table=None): """ Performs all the processing of the record to be displayed. :param recid: :param record: :param version: :param version_count: :param hepdata_submission: :param data_table: :return: """ ctx = {} if hepdata_submission is not None: ctx['site_url'] = current_app.config.get('SITE_URL', 'https://www.hepdata.net') ctx['record'] = record ctx["version_count"] = version_count if version is not -1: ctx["version"] = version else: # we get the latest version by default ctx["version"] = version_count if record is not None: if "collaborations" in record and type(record['collaborations']) is not list: collaborations = [x.strip() for x in record["collaborations"].split(",")] ctx['record']['collaborations'] = collaborations authors = record.get('authors', None) create_breadcrumb_text(authors, ctx, record) get_commit_message(ctx, recid) if authors: truncate_author_list(record) determine_user_privileges(recid, ctx) else: ctx['record'] = {} determine_user_privileges(recid, ctx) ctx['show_upload_widget'] = True ctx['show_review_widget'] = False ctx['participant_count'] = SubmissionParticipant.query \ .filter_by(publication_recid=recid, status="primary") \ .filter(SubmissionParticipant.role.in_(["reviewer", "uploader"])) \ .count() ctx['reviewers_notified'] = hepdata_submission.reviewers_notified ctx['record']['last_updated'] = hepdata_submission.last_updated ctx['record']['hepdata_doi'] = "{0}".format(hepdata_submission.doi) if ctx['version'] > 1: ctx['record']['hepdata_doi'] += ".v{0}".format(ctx['version']) ctx['recid'] = recid ctx["status"] = hepdata_submission.overall_status ctx['record']['data_abstract'] = sanitize_html(decode_string(hepdata_submission.data_abstract)) extract_journal_info(record) if hepdata_submission.overall_status != 'finished' and ctx["version_count"] > 0: if not (ctx['show_review_widget'] or ctx['show_upload_widget'] or ctx['is_submission_coordinator_or_admin']): # we show the latest approved version. ctx["version"] -= 1 ctx["version_count"] -= 1 ctx['additional_resources'] = submission_has_resources(hepdata_submission) # query for a related data submission data_record_query = DataSubmission.query.filter_by( publication_recid=recid, version=ctx["version"]).order_by(DataSubmission.id.asc()) format_tables(ctx, data_record_query, data_table, recid) ctx['access_count'] = get_count(recid) ctx['mode'] = 'record' ctx['coordinator'] = hepdata_submission.coordinator ctx['coordinators'] = get_coordinators_in_system() ctx['record'].pop('authors', None) return ctx
[docs]def format_tables(ctx, data_record_query, data_table, recid): """ Finds all the tables related to a submission and formats them for display in the UI or as JSON. :return: """ first_data_id = -1 data_table_metadata, first_data_id = process_data_tables( ctx, data_record_query, first_data_id, data_table) assign_or_create_review_status(data_table_metadata, recid, ctx["version"]) ctx['watched'] = is_current_user_subscribed_to_record(recid) ctx['data_tables'] = list(data_table_metadata.values()) ctx['table_id_to_show'] = first_data_id ctx['table_name_to_show'] = '' matching_tables = list(filter( lambda data_table: data_table['id'] == first_data_id, ctx['data_tables'])) if matching_tables: ctx['table_name_to_show'] = matching_tables[0]['name'] if 'table' in request.args: if request.args['table']: table_from_args = request.args['table'] # Check for table name in list of data tables. matching_tables = list(filter( lambda data_table: data_table['name'] == table_from_args, ctx['data_tables'])) if not matching_tables: # Check for processed table name in list of data tables. matching_tables = list(filter( lambda data_table: data_table['processed_name'] == table_from_args, ctx['data_tables'])) if matching_tables: # Set table ID and name to the first matching table. ctx['table_id_to_show'] = matching_tables[0]['id'] ctx['table_name_to_show'] = matching_tables[0]['name']
[docs]def get_commit_message(ctx, recid): """ Returns a commit message for the current version if present. :param ctx: :param recid: """ try: commit_message_query = RecordVersionCommitMessage.query \ .filter_by(version=ctx["version"], recid=recid) if commit_message_query.count() > 0: commit_message = commit_message_query.one() ctx["revision_message"] = { 'version': commit_message.version, 'message': commit_message.message} except NoResultFound: pass
[docs]def create_breadcrumb_text(authors, ctx, record): """Creates the breadcrumb text for a submission.""" if "first_author" in record and 'full_name' in record["first_author"] \ and record["first_author"]["full_name"] is not None: ctx['breadcrumb_text'] = record["first_author"]["full_name"] if authors is not None and len(record['authors']) > 1: ctx['breadcrumb_text'] += " et al."
[docs]def submission_has_resources(hepsubmission): """ Returns whether the submission has resources attached. :param hepsubmission: HEPSubmission object :return: bool """ return len(hepsubmission.resources) > 0
[docs]def extract_journal_info(record): if record and 'type' in record: if 'thesis' in record['type']: if 'type' in record['dissertation']: record['journal_info'] = record['dissertation']['type'] + ", " + record['dissertation'][ 'institution'] else: record['journal_info'] = "PhD Thesis" elif 'conference paper' in record['type']: record['journal_info'] = "Conference Paper"
[docs]def render_record(recid, record, version, output_format, light_mode=False): # Count number of all versions and number of finished versions of a publication record. version_count_all = HEPSubmission.query.filter(HEPSubmission.publication_recid == recid, and_(HEPSubmission.overall_status != 'sandbox', HEPSubmission.overall_status != 'sandbox_processing')).count() version_count_finished = HEPSubmission.query.filter_by(publication_recid=recid, overall_status='finished').count() # Number of versions that a user is allowed to access based on their permissions. version_count = version_count_all if user_allowed_to_perform_action(recid) else version_count_finished # If version not given explicitly, take to be latest allowed version (or 1 if there are no allowed versions). if version == -1: version = version_count if version_count else 1 # Check for a user trying to access a version of a publication record where they don't have permissions. if version_count < version_count_all and version == version_count_all: # Prompt the user to login if they are not authenticated then redirect, otherwise return a 403 error. if not current_user.is_authenticated: redirect_url_after_login = '%2Frecord%2F{0}%3Fversion%3D{1}%26format%3D{2}'.format(recid, version, output_format) if 'table' in request.args: redirect_url_after_login += '%26table%3D{0}'.format(request.args['table']) return redirect('/login/?next={0}'.format(redirect_url_after_login)) else: abort(403) hepdata_submission = get_latest_hepsubmission(publication_recid=recid, version=version) if hepdata_submission is not None: if hepdata_submission.overall_status == 'processing': ctx = {'recid': recid} determine_user_privileges(recid, ctx) return render_template('hepdata_records/publication_processing.html', ctx=ctx) elif not hepdata_submission.overall_status.startswith('sandbox'): ctx = format_submission(recid, record, version, version_count, hepdata_submission) increment(recid) if output_format == 'html': return render_template('hepdata_records/publication_record.html', ctx=ctx) elif 'table' not in request.args: if output_format == 'json': ctx = process_ctx(ctx, light_mode) return jsonify(ctx) elif output_format == 'yoda' and 'rivet' in request.args: return redirect('/download/submission/{0}/{1}/{2}/{3}'.format(recid, version, output_format, request.args['rivet'])) else: return redirect('/download/submission/{0}/{1}/{2}'.format(recid, version, output_format)) else: file_identifier = 'ins{}'.format(hepdata_submission.inspire_id) if hepdata_submission.inspire_id else recid if output_format == 'yoda' and 'rivet' in request.args: return redirect('/download/table/{0}/{1}/{2}/{3}/{4}'.format( file_identifier, request.args['table'].replace('%', '%25').replace('\\', '%5C'), version, output_format, request.args['rivet'])) else: return redirect('/download/table/{0}/{1}/{2}/{3}'.format( file_identifier, request.args['table'].replace('%', '%25').replace('\\', '%5C'), version, output_format)) else: abort(404) elif record is not None: # this happens when we access an id of a data record # in which case, we find the related publication, and # make the front end focus on the relevant data table. try: publication_recid = int(record['related_publication']) publication_record = get_record_contents(publication_recid) hepdata_submission = get_latest_hepsubmission(publication_recid=publication_recid) ctx = format_submission(publication_recid, publication_record, hepdata_submission.version, 1, hepdata_submission, data_table=record['title']) ctx['related_publication_id'] = publication_recid ctx['table_name'] = record['title'] if output_format == 'html': return render_template('hepdata_records/data_record.html', ctx=ctx) elif output_format == 'yoda' and 'rivet' in request.args: return redirect('/download/table/{0}/{1}/{2}/{3}/{4}'.format( publication_recid, ctx['table_name'].replace('%', '%25').replace('\\', '%5C'), hepdata_submission.version, output_format, request.args['rivet'])) else: return redirect('/download/table/{0}/{1}/{2}/{3}'.format( publication_recid, ctx['table_name'].replace('%', '%25').replace('\\', '%5C'), hepdata_submission.version, output_format)) except Exception as e: abort(404) else: abort(404)
[docs]def has_upload_permissions(recid, user, is_sandbox=False): if has_role(user, 'admin'): return True if is_sandbox: hepsubmission_record = get_latest_hepsubmission(publication_recid=recid, overall_status='sandbox') return hepsubmission_record is not None and hepsubmission_record.coordinator == user.id participant = SubmissionParticipant.query.filter_by(user_account=user.id, role='uploader', publication_recid=recid, status='primary').first() if participant: return True
[docs]def has_coordinator_permissions(recid, user, is_sandbox=False): if has_role(user, 'admin'): return True coordinator_record = HEPSubmission.query.filter_by( publication_recid=recid, coordinator=user.get_id()).first() return coordinator_record is not None
[docs]def create_new_version(recid, user, notify_uploader=True, uploader_message=None): hepsubmission = get_latest_hepsubmission(publication_recid=recid) if hepsubmission.overall_status == 'finished': # Reopen the submission to allow for revisions, # by creating a new HEPSubmission object. _rev_hepsubmission = HEPSubmission(publication_recid=recid, overall_status='todo', inspire_id=hepsubmission.inspire_id, coordinator=hepsubmission.coordinator, version=hepsubmission.version + 1) db.session.add(_rev_hepsubmission) db.session.commit() if notify_uploader: uploaders = SubmissionParticipant.query.filter_by( role='uploader', publication_recid=recid, status='primary' ) record_information = get_record_by_id(recid) for uploader in uploaders: send_cookie_email(uploader, record_information, message=uploader_message, version=_rev_hepsubmission.version) return jsonify({'success': True, 'version': _rev_hepsubmission.version}) else: return jsonify({"message": f"Rec id {recid} is not finished so cannot create a new version"}), 400
[docs]def process_payload(recid, file, redirect_url, synchronous=False): """Process an uploaded file :param recid: int The id of the record to update :param file: file The file to process :param redirect_url: string Redirect URL to record, for use if the upload fails or in synchronous mode :param synchronous: bool Whether to process asynchronously via celery (default) or immediately (only recommended for tests) :return: JSONResponse either containing 'url' (for success cases) or 'message' (for error cases, which will give a 400 error). """ if file and (allowed_file(file.filename)): file_path = save_zip_file(file, recid) file_size = os.path.getsize(file_path) UPLOAD_MAX_SIZE = current_app.config.get('UPLOAD_MAX_SIZE', 52000000) if file_size > UPLOAD_MAX_SIZE: return jsonify({"message": "{} too large ({} bytes > {} bytes)".format( file.filename, file_size, UPLOAD_MAX_SIZE)}), 413 hepsubmission = get_latest_hepsubmission(publication_recid=recid) if hepsubmission.overall_status == 'finished': # If it is finished and we receive an update, # then we need to reopen the submission to allow for revisions, # by creating a new HEPSubmission object. _rev_hepsubmission = HEPSubmission(publication_recid=recid, overall_status='todo', inspire_id=hepsubmission.inspire_id, coordinator=hepsubmission.coordinator, version=hepsubmission.version + 1) db.session.add(_rev_hepsubmission) hepsubmission = _rev_hepsubmission previous_status = hepsubmission.overall_status hepsubmission.overall_status = 'sandbox_processing' if previous_status == 'sandbox' else 'processing' db.session.add(hepsubmission) db.session.commit() if synchronous: process_saved_file(file_path, recid, current_user.get_id(), redirect_url, previous_status) else: process_saved_file.delay(file_path, recid, current_user.get_id(), redirect_url, previous_status) flash('File saved. You will receive an email when the file has been processed.', 'info') return jsonify({'url': redirect_url.format(recid)}) else: return jsonify({"message": "You must upload a .zip, .tar, .tar.gz or .tgz file" + " (or a .oldhepdata or single .yaml or .yaml.gz file)."}), 400
[docs]@shared_task def process_saved_file(file_path, recid, userid, redirect_url, previous_status): try: hepsubmission = get_latest_hepsubmission(publication_recid=recid) if hepsubmission.overall_status != 'processing' and hepsubmission.overall_status != 'sandbox_processing': log.error('Record {} is not in a processing state.'.format(recid)) return errors = process_zip_archive(file_path, recid) uploader = User.query.get(userid) site_url = current_app.config.get('SITE_URL', 'https://www.hepdata.net') submission_participant = SubmissionParticipant.query.filter_by( publication_recid=recid, user_account=userid, role='uploader').first() if submission_participant: full_name = submission_participant.full_name else: full_name = uploader.email if errors: cleanup_submission(recid, hepsubmission.version, []) # delete all tables if errors message_body = render_template('hepdata_theme/email/upload_errors.html', name=full_name, article=recid, redirect_url=redirect_url.format(recid), errors=errors, site_url=site_url) create_send_email_task(uploader.email, '[HEPData] Submission {0} upload failed'.format(recid), message_body) else: update_action_for_submission_participant(recid, userid, 'uploader') message_body = render_template('hepdata_theme/email/upload_complete.html', name=full_name, article=recid, link=redirect_url.format(recid), site_url=site_url) create_send_email_task(uploader.email, '[HEPData] Submission {0} upload succeeded'.format(recid), message_body) # Reset the status of the submission back to the previous value. hepsubmission.overall_status = previous_status db.session.add(hepsubmission) db.session.commit() # Delete any previous upload folders relating to non-final versions # of this hepsubmission cleanup_old_files(hepsubmission) except Exception as e: # Reset the status and send error emails, unless we're working # asynchronously and celery is about to retry if not process_saved_file.request.id \ or process_saved_file.request.retries >= process_saved_file.max_retries: try: cleanup_submission(recid, hepsubmission.version, []) errors = { "Unexpected error": [{ "level": "error", "message": "An unexpected error occurred: {}".format(e) }] } uploader = User.query.get(userid) site_url = current_app.config.get('SITE_URL', 'https://www.hepdata.net') message_body = render_template('hepdata_theme/email/upload_errors.html', name=uploader.email, article=recid, redirect_url=redirect_url.format(recid), errors=errors, site_url=site_url) create_send_email_task(uploader.email, '[HEPData] Submission {0} upload failed'.format(recid), message_body) log.error("Final attempt of process_saved_file for recid %s failed. Resetting to previous status." % recid) # Reset the status of the submission back to the previous value. hepsubmission.overall_status = previous_status db.session.add(hepsubmission) db.session.commit() except Exception as ex: log.error("Exception while cleaning up: %s" % ex) else: log.debug("Celery will retry task, attempt %s" % process_saved_file.request.retries) raise e
[docs]def save_zip_file(file, id): filename = secure_filename(file.filename) time_stamp = str(int(round(time.time()))) file_save_directory = get_data_path_for_record(str(id), time_stamp) if filename.endswith('.oldhepdata'): file_save_directory = os.path.join(file_save_directory, 'oldhepdata') if not os.path.exists(file_save_directory): os.makedirs(file_save_directory) file_path = os.path.join(file_save_directory, filename) print('Saving file to {}'.format(file_path)) file.save(file_path) return file_path
[docs]def process_zip_archive(file_path, id, old_submission_schema=False, old_data_schema=False): (file_save_directory, filename) = os.path.split(file_path) if not filename.endswith('.oldhepdata'): file_save_directory = os.path.dirname(file_path) submission_path = os.path.join(file_save_directory, remove_file_extension(filename)) submission_temp_path = tempfile.mkdtemp(dir=current_app.config["CFG_TMPDIR"]) if filename.endswith('.yaml.gz'): print('Extracting: {} to {}'.format(file_path, file_path[:-3])) if not extract(file_path, file_path[:-3]): return { "Archive file extractor": [{ "level": "error", "message": "{} is not a valid .gz file.".format(file_path) }] } return process_zip_archive(file_path[:-3], id, old_submission_schema=old_submission_schema, old_data_schema=False) elif filename.endswith('.yaml'): # we split the singular yaml file and create a submission directory error, last_updated = split_files(file_path, submission_temp_path) if error: return { "Single YAML file splitter": [{ "level": "error", "message": str(error) }] } else: # we are dealing with a zip, tar, etc. so we extract the contents try: unzipped_path = extract(file_path, submission_temp_path) except Exception as e: unzipped_path = None if not unzipped_path: return { "Archive file extractor": [{ "level": "error", "message": "{} is not a valid zip or tar archive file.".format(file_path) }] } copy_errors = move_files(submission_temp_path, submission_path) if copy_errors: return copy_errors submission_found = find_file_in_directory(submission_path, lambda x: x == "submission.yaml") if not submission_found: return { "Archive file extractor": [{ "level": "error", "message": "No submission.yaml file has been found in the archive." }] } basepath, submission_file_path = submission_found else: file_dir = os.path.dirname(file_save_directory) time_stamp = os.path.split(file_dir)[1] result = check_and_convert_from_oldhepdata(os.path.dirname(file_save_directory), id, time_stamp) # Check for errors if type(result) == dict: return result else: basepath, submission_file_path = result old_data_schema = True return process_submission_directory(basepath, submission_file_path, id, old_data_schema=old_data_schema, old_submission_schema=old_submission_schema)
[docs]def check_and_convert_from_oldhepdata(input_directory, id, timestamp): """ Check if the input directory contains a .oldhepdata file and convert it to YAML if it happens. """ converted_path = get_data_path_for_record(str(id), timestamp, 'yaml') oldhepdata_found = find_file_in_directory( input_directory, lambda x: x.endswith('.oldhepdata'), ) if not oldhepdata_found: return { "Converter": [{ "level": "error", "message": "No file with .oldhepdata extension has been found." }] } converted_temp_dir = tempfile.mkdtemp(dir=current_app.config["CFG_TMPDIR"]) converted_temp_path = os.path.join(converted_temp_dir, 'yaml') try: successful = convert_oldhepdata_to_yaml(oldhepdata_found[1], converted_temp_path) if not successful: # Parse error message from title of HTML file, removing part of string after final "//". soup = BeautifulSoup(open(converted_temp_path), "lxml") errormsg = soup.title.string.rsplit("//", 1)[0] except Error as error: # hepdata_converter_ws_client.Error successful = False errormsg = str(error) if not successful: shutil.rmtree(converted_temp_dir, ignore_errors=True) # can uncomment when this is definitely working return { "Converter": [{ "level": "error", "message": "The conversion from oldhepdata " "to the YAML format has not succeeded. " "Error message from converter follows:<br/><br/>" + errormsg }] } else: copy_errors = move_files(converted_temp_path, converted_path) if copy_errors: return copy_errors return find_file_in_directory(converted_path, lambda x: x == "submission.yaml")
[docs]def move_files(submission_temp_path, submission_path): print('Copying files from {} to {}'.format(submission_temp_path + '/.', submission_path)) try: shutil.rmtree(submission_path, ignore_errors=True) shutil.copytree(submission_temp_path, submission_path, symlinks=False) except shutil.Error as e: errors = [] for srcname, dstname, exception in e.args[0]: # Remove full paths from filenames before sending error message to user filename = srcname.replace(submission_temp_path + '/', '') msg = str(exception).replace(submission_temp_path + '/', '').replace(submission_path + '/', '') errors.append({ "level": "error", "message": 'Invalid file {}: {}'.format(filename, msg) }) return { "Exceptions when copying files": errors } except Exception as e: # Remove full paths from filenames before sending error message to user msg = str(e).replace(submission_temp_path + '/', '').replace(submission_path + '/', '') return { "Exceptions when copying files": [{ "level": "error", "message": msg }] } finally: shutil.rmtree(submission_temp_path, ignore_errors=True)
[docs]def query_messages_for_data_review(data_review_record, messages): if data_review_record.messages: data_messages = data_review_record.messages data_messages.sort(key=lambda data_message: data_message.id, reverse=True) for data_message in data_messages: current_user_obj = get_user_from_id(data_message.user) messages.append( {"message": data_message.message, "user": current_user_obj.email, "post_time": data_message.creation_date}) return messages
[docs]def assign_or_create_review_status(data_table_metadata, publication_recid, version): """ If a review already exists, it will be attached to the current data record. If a review does not exist for a data table, it will be created. :param data_table_metadata: the metadata describing the main table. :param publication_recid: publication record id :param version: """ data_review_query = DataReview.query.filter_by( publication_recid=publication_recid, version=version) # this method should also create all the DataReviews for data_tables that # are not currently present to avoid # only creating data reviews when the review is clicked explicitly. assigned_tables = [] if data_review_query.count() > 0: data_review_records = data_review_query.all() for data_review in data_review_records: if data_review.data_recid in data_table_metadata: data_table_metadata[data_review.data_recid][ "review_flag"] = data_review.status data_table_metadata[data_review.data_recid]["review_status"] = \ RECORD_PLAIN_TEXT[data_review.status] data_table_metadata[data_review.data_recid]["messages"] = len( data_review.messages) > 0 assigned_tables.append(data_review.data_recid) # now create the missing data reviews for data_table_id in data_table_metadata: if data_table_id not in assigned_tables: data_record = create_data_review( data_table_id, publication_recid, version=version) data_table_metadata[data_table_id][ "review_flag"] = data_record.status data_table_metadata[data_table_id]["review_status"] = \ RECORD_PLAIN_TEXT[data_record.status]
[docs]def determine_user_privileges(recid, ctx): # show_review_area = not show_upload_area ctx['show_review_widget'] = False ctx['show_upload_widget'] = False ctx['is_submission_coordinator_or_admin'] = False ctx['is_admin'] = False if current_user.is_authenticated: user_id = current_user.get_id() participant_records = get_submission_participants_for_record(recid, user_account=user_id) for participant_record in participant_records: if participant_record is not None: if participant_record.role == 'reviewer' and participant_record.status == 'primary': ctx['show_review_widget'] = True if participant_record.role == 'uploader' and participant_record.status == 'primary': ctx['show_upload_widget'] = True user = User.query.get(current_user.get_id()) if has_role(user, 'admin'): ctx['is_submission_coordinator_or_admin'] = True ctx['is_admin'] = True else: matching_records = HEPSubmission.query.filter_by( publication_recid=recid, coordinator=current_user.get_id()).count() if matching_records > 0: ctx['is_submission_coordinator_or_admin'] = True ctx['show_upload_widget'] = ( ctx['show_upload_widget'] or ctx[ 'is_submission_coordinator_or_admin'])
[docs]def process_data_tables(ctx, data_record_query, first_data_id, data_table=None): data_table_metadata = OrderedDict() ctx['show_upload_area'] = False if ctx['show_upload_widget'] and data_record_query.count() == 0: ctx['show_upload_area'] = True elif data_record_query.count() > 0: record_submissions = data_record_query.all() for submission_record in record_submissions: processed_name = "".join(submission_record.name.split()) data_table_metadata[submission_record.id] = { "id": submission_record.id, "processed_name": processed_name, "name": submission_record.name, "location": submission_record.location_in_publication, "doi": submission_record.doi, "description": sanitize_html( truncate_string(submission_record.description, 20), tags=[], strip=True ) } if first_data_id == -1: first_data_id = submission_record.id if data_table: if submission_record.name == data_table: first_data_id = submission_record.id return data_table_metadata, first_data_id
[docs]def truncate_author_list(record, length=10): record['authors'] = record['authors'][:length]
[docs]def get_all_ids(index=None, id_field='recid', last_updated=None, latest_first=False): """Get all record or inspire ids of publications in the search index :param index: name of index to use. :param id_field: id type to return. Should be 'recid' or 'inspire_id' :return: list of integer ids """ if id_field not in ('recid', 'inspire_id'): raise ValueError('Invalid ID field %s' % id_field) db_col = HEPSubmission.publication_recid if id_field == 'recid' \ else HEPSubmission.inspire_id # Get unique version query = db.session.query(db_col) \ .filter(HEPSubmission.overall_status == 'finished') if last_updated: query = query.filter(HEPSubmission.last_updated >= last_updated) if latest_first: # Use a set to check for duplicates, as sorting by last_updated # means distinct doesn't work (as it looks for distinct across both # cols) query = query.order_by(HEPSubmission.last_updated.desc()) seen = set() seen_add = seen.add return [ int(x[0]) for x in query.all() if not (x[0] in seen or seen_add(x[0])) ] else: query = query.order_by(HEPSubmission.publication_recid).distinct() return [int(x[0]) for x in query.all()]