diff --git a/.gitignore b/.gitignore index e3dd55b1bc1c42453d30930ee2c032c5fab8d72b..b5493e310105c775412c6d14df92c683a1e506ef 100644 --- a/.gitignore +++ b/.gitignore @@ -84,3 +84,4 @@ tasks.py # vim related .vim/ Session.vim +Local-Deployment/.env diff --git a/Exareme-Docker/Dockerfile b/Exareme-Docker/Dockerfile index 12aeba04366cb0b16ba911a1086e11c1f0b82bce..208555ee825f30189675b8a8903ac68715f4817c 100644 --- a/Exareme-Docker/Dockerfile +++ b/Exareme-Docker/Dockerfile @@ -1,122 +1,70 @@ -FROM alpine:3.6 -MAINTAINER Sofia Karvounari <karvoun@di.uoa.gr> - -# Here we install GNU libc (aka glibc) and set C.UTF-8 locale as default. -RUN ALPINE_GLIBC_BASE_URL="https://github.com/sgerrand/alpine-pkg-glibc/releases/download" && \ - ALPINE_GLIBC_PACKAGE_VERSION="2.23-r2" && \ - ALPINE_GLIBC_BASE_PACKAGE_FILENAME="glibc-$ALPINE_GLIBC_PACKAGE_VERSION.apk" && \ - ALPINE_GLIBC_BIN_PACKAGE_FILENAME="glibc-bin-$ALPINE_GLIBC_PACKAGE_VERSION.apk" && \ - ALPINE_GLIBC_I18N_PACKAGE_FILENAME="glibc-i18n-$ALPINE_GLIBC_PACKAGE_VERSION.apk" && \ - apk add --no-cache --virtual=build-dependencies wget ca-certificates && \ - wget \ - "$ALPINE_GLIBC_BASE_URL/$ALPINE_GLIBC_PACKAGE_VERSION/$ALPINE_GLIBC_BASE_PACKAGE_FILENAME" \ - "$ALPINE_GLIBC_BASE_URL/$ALPINE_GLIBC_PACKAGE_VERSION/$ALPINE_GLIBC_BIN_PACKAGE_FILENAME" \ - "$ALPINE_GLIBC_BASE_URL/$ALPINE_GLIBC_PACKAGE_VERSION/$ALPINE_GLIBC_I18N_PACKAGE_FILENAME" && \ - apk add --allow-untrusted --no-cache \ - "$ALPINE_GLIBC_BASE_PACKAGE_FILENAME" \ - "$ALPINE_GLIBC_BIN_PACKAGE_FILENAME" \ - "$ALPINE_GLIBC_I18N_PACKAGE_FILENAME" && \ - /usr/glibc-compat/bin/localedef --force --inputfile POSIX --charmap UTF-8 C.UTF-8 || true && \ - echo "export LANG=C.UTF-8" > /etc/profile.d/locale.sh && \ - apk del glibc-i18n && \ - apk del build-dependencies && \ - rm \ - "$ALPINE_GLIBC_BASE_PACKAGE_FILENAME" \ - "$ALPINE_GLIBC_BIN_PACKAGE_FILENAME" \ - "$ALPINE_GLIBC_I18N_PACKAGE_FILENAME" +FROM ubuntu:20.04 +MAINTAINER Thanasis Karampatsis <tkarabatsis@athenarc.gr> ENV LANG=C.UTF-8 -######################################################## -# Install Java (Shamelessly copy pasted from develar/java, -# https://github.com/develar/docker-java/blob/master/Dockerfile) -# -# LSC: Updated for new URLs schemes on the Oracle website. -ENV JAVA_VERSION_MAJOR=8 \ - JAVA_VERSION_MINOR=141 \ - JAVA_VERSION_BUILD=15 \ - JAVA_VERSION_HASH=336fa29ff2bb4ef291e347e091f7f4a7 \ - JAVA_PACKAGE=server-jre \ - JAVA_HOME=/jre \ - PATH=${PATH}:/jre/bin \ - LANG=C.UTF-8 - -# about nsswitch.conf - see https://registry.hub.docker.com/u/frolvlad/alpine-oraclejdk8/dockerfile/ - #/usr/glibc/usr/bin/ldconfig /lib /usr/glibc/usr/lib && \ - -RUN apk add --update curl ca-certificates && \ - cd /tmp && \ - echo 'hosts: files mdns4_minimal [NOTFOUND=return] dns mdns4' >> /etc/nsswitch.conf && \ - curl -jksSLH "Cookie: oraclelicense=accept-securebackup-cookie" \ - "http://download.oracle.com/otn-pub/java/jdk/${JAVA_VERSION_MAJOR}u${JAVA_VERSION_MINOR}-b${JAVA_VERSION_BUILD}/${JAVA_VERSION_HASH}/${JAVA_PACKAGE}-${JAVA_VERSION_MAJOR}u${JAVA_VERSION_MINOR}-linux-x64.tar.gz" \ - | gunzip -c - | tar -xf - && \ - apk del curl ca-certificates && \ - mv jdk1.${JAVA_VERSION_MAJOR}.0_${JAVA_VERSION_MINOR}/jre /jre && \ - rm /jre/bin/jjs && \ - rm /jre/bin/keytool && \ - rm /jre/bin/orbd && \ - rm /jre/bin/pack200 && \ - rm /jre/bin/policytool && \ - rm /jre/bin/rmid && \ - rm /jre/bin/rmiregistry && \ - rm /jre/bin/servertool && \ - rm /jre/bin/tnameserv && \ - rm /jre/bin/unpack200 && \ - rm /jre/lib/ext/nashorn.jar && \ - rm /jre/lib/jfr.jar && \ - rm -rf /jre/lib/jfr && \ - rm -rf /jre/lib/oblique-fonts && \ - rm -rf /tmp/* /var/cache/apk/* - -# Some extra python libraries for the mip-algorithms, which needs to be -# compiled by hand +# Setting up timezone +ENV TZ=Europe/Athens +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +RUN apt update + +# Installing python +RUN apt install -y --no-install-recommends python2 +RUN ln -s /usr/bin/python2 /usr/bin/python + +# Installing Exareme requirements +RUN apt install -y openjdk-8-jdk curl jq iputils-ping + +# Installing pip +RUN curl -O https://raw.githubusercontent.com/pypa/get-pip/master/get-pip.py +RUN python get-pip.py +RUN apt-get install -y python-dev \ + build-essential libssl-dev libffi-dev \ + libxml2-dev libxslt1-dev zlib1g-dev + ADD files/requirements.txt /root/requirements.txt -RUN apk add --update py-psycopg2 py-pip ca-certificates gcc musl-dev python-dev lapack-dev g++ gfortran RUN pip install -r /root/requirements.txt RUN pip install scipy==1.2.1 scikit-learn==0.20.3 RUN pip install pandas -RUN apk add --update freetype-dev libjpeg-turbo-dev libpng-dev RUN pip install lifelines RUN pip install liac-arff RUN pip install sqlalchemy RUN pip install pathlib RUN pip install tqdm RUN pip install colour +RUN pip install tornado +RUN pip install statsmodels==0.10.2 -RUN pip install tornado #tornado server is used in the Madis Server +# Installing R +RUN apt install -y software-properties-common +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 +RUN add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' +RUN apt update +RUN apt install -y r-base +RUN Rscript -e 'install.packages("randomForest", repos="https://cloud.r-project.org")' -RUN apk del py-pip ca-certificates gcc musl-dev python-dev lapack-dev gfortran -RUN rm -rf /tmp/* /var/cache/apk/* - -# Runtime dependencies for Exareme -RUN apk add --update rsync curl bash jq python py-requests lapack --no-cache procps && \ - rm -rf /tmp/* /var/cache/apk/* +RUN Rscript -e 'install.packages("caret")' +RUN Rscript -e 'install.packages("e1071")' +RUN pip install rpy2==2.8.6 +# Add Madis Server +ADD src/madisServer /root/madisServer # Add Exareme ADD src/exareme/exareme-distribution/target/exareme /root/exareme +ADD files/root /root +RUN chmod -R 755 /root/exareme/*.py /root/exareme/*.sh # Add the algorithms ADD src/mip-algorithms /root/mip-algorithms -# Add Madis Server -ADD src/madisServer /root/madisServer - -# Exareme configuration, ssh keys and so on -# This has to be done after copying in the algorithms and exareme, as some -# files are placed in folders created by those two steps. -ADD files/java.sh /etc/profile.d/java.sh -RUN chmod 755 /etc/profile.d/java.sh -ADD files/root /root -RUN chmod -R 755 /root/exareme/*.py /root/exareme/*.sh - EXPOSE 9090 EXPOSE 22 ENV USER=root -ENV PYTHONPATH "${PYTONPATH}:/root/mip-algorithms" +ENV PYTHONPATH "${PYTHONPATH}:/root/mip-algorithms" WORKDIR /root/exareme CMD ["/bin/bash","bootstrap.sh"] -# While debugging -#ENTRYPOINT /bin/sh + diff --git a/Exareme-Docker/files/root/exareme/bootstrap.sh b/Exareme-Docker/files/root/exareme/bootstrap.sh index ef074eb967fdbbe2f76527f7417b256c4670846c..46ed522bd43949d0038476675db0346557ea4025 100755 --- a/Exareme-Docker/files/root/exareme/bootstrap.sh +++ b/Exareme-Docker/files/root/exareme/bootstrap.sh @@ -4,321 +4,288 @@ export DOCKER_DATA_FOLDER="/root/exareme/data/" export DOCKER_METADATA_FOLDER="/root/exareme/data/" -export EXAREME_ACTIVE_WORKERS_PATH="active_workers" -export EXAREME_MASTER_PATH="master" -export DATA="data" - -if [[ -z ${CONSULURL} ]]; then echo "CONSULURL is unset. Check docker-compose file."; exit; fi -if [[ -z ${NODE_NAME} ]]; then echo "NODE_NAME is unset. Check docker-compose file.";exit; fi -if [[ -z ${FEDERATION_ROLE} ]]; then echo "FEDERATION_ROLE is unset. Check docker-compose file.";exit; fi -if [[ -z ${ENVIRONMENT_TYPE} ]]; then echo "ENVIRONMENT_TYPE is unset. Check docker-compose file.";exit; fi - -#Stop Exareme service -stop_exareme () { - if [[ -f /tmp/exareme/var/run/*.pid ]]; then - kill -9 $( cat /tmp/exareme/var/run/*.pid) - rm /tmp/exareme/var/run/*.pid - echo "Stopped." - exit 0 - else - echo "Already stopped, no action taken." - fi -} -#Clean ups in Consul [key-value store] -deleteKeysFromConsul () { - if [[ "$(curl -s -o /dev/null -i -w "%{http_code}\n" ${CONSULURL}/v1/kv/${DATA}/${NODE_NAME}?keys)" = "200" ]]; then - curl -s -X DELETE $CONSULURL/v1/kv/$DATASETS/$NODE_NAME - fi - if [[ "$(curl -s -o /dev/null -i -w "%{http_code}\n" ${CONSULURL}/v1/kv/${1}/${NODE_NAME}?keys)" = "200" ]]; then - curl -s -X DELETE $CONSULURL/v1/kv/$1/$NODE_NAME - fi +export CONSUL_DATA_PATH="data" +export CONSUL_MASTER_PATH="master" +export CONSUL_ACTIVE_WORKERS_PATH="active_workers" + +CONSUL_CONNECTION_MAX_ATTEMPTS=20 +CONSUL_WAIT_FOR_MASTER_IP_MAX_ATTEMPTS=20 +EXAREME_NODE_STARTUP_HEALTH_CHECK_MAX_ATTEMPTS=10 +EXAREME_NODE_HEALTH_CHECK_TIMEOUT=60 +MASTER_NODE_REACHABLE_TIMEOUT=5 +PERIODIC_EXAREME_NODES_HEALTH_CHECK_MAX_RETRIES=5 +PERIODIC_EXAREME_NODES_HEALTH_CHECK_INTERVAL=120 +EXAREME_HEALTH_CHECK_AWAIT_TIME=60 +PERIODIC_TEMP_FILES_REMOVAL=300 + +if [[ -z ${CONSULURL} ]]; then + echo "CONSULURL is unset. Check docker-compose file." + exit +fi +if [[ -z ${NODE_NAME} ]]; then + echo "NODE_NAME is unset. Check docker-compose file." + exit +fi +if [[ -z ${FEDERATION_ROLE} ]]; then + echo "FEDERATION_ROLE is unset. Check docker-compose file." + exit +fi +if [[ -z ${ENVIRONMENT_TYPE} ]]; then + echo "ENVIRONMENT_TYPE is unset. Check docker-compose file." + exit +fi +if [[ -z ${CONVERT_CSVS} ]]; then + echo "CONVERT_CSVS is unset. Check docker-compose file." + exit +fi + +timestamp() { + date +%F' '%T } -#CSVs to DB -transformCsvToDB () { - # Both Master and Worker should transform the csvs to sqlite db files - # Removing all previous .db files from the DOCKER_DATA_FOLDER - echo "Deleting previous db files. " - rm -rf ${DOCKER_DATA_FOLDER}/**/*.db - - echo "Parsing the csv files in " ${DOCKER_DATA_FOLDER} " to db files. " - python ./convert-csv-dataset-to-db.py -f ${DOCKER_DATA_FOLDER} -t ${1} - #Get the status code from previous command - py_script=$? - #If status code != 0 an error has occurred - if [[ ${py_script} -ne 0 ]]; then - echo "Script: \"convert-csv-dataset-to-db.py\" exited with error." >&2 - exit 1 - fi +# Wait until Consul [key-value store] is up and running +waitForConsulToStart() { + attempts=0 + while [[ "$(curl -s ${CONSULURL}/v1/health/state/passing | jq -r '.[].Status')" != "passing" ]]; do + echo -e "\n$(timestamp) Trying to connect with Consul [key-value store]... " + + # Exit after 30 attempts + if [[ $attempts -ge $CONSUL_CONNECTION_MAX_ATTEMPTS ]]; then + echo -e "\n$(timestamp) Consul[key-value store] may not be initialized or Node with IP: ${NODE_IP} and name: ${NODE_NAME} can not contact it." + exit 1 + fi + + attempts=$(($attempts + 1)) + sleep 5 + done + echo -e "\n$(timestamp) Node connected to the consul." } -# Setup signal handlers -trap term_handler SIGTERM SIGKILL +# Get Master Node IP from Consul +getMasterIPFromConsul() { + attempts=0 + while [[ "$(curl -s -o /dev/null -i -w "%{http_code}\n" ${CONSULURL}/v1/kv/${CONSUL_MASTER_PATH}/?keys)" != "200" ]]; do + echo -e "$(timestamp) Retrieving Master's info from Consul[key-value store]..." -#This funciton will be executed when the container receives the SIGTERM signal (when stopping) -term_handler () { + if [[ $attempts -ge $CONSUL_WAIT_FOR_MASTER_IP_MAX_ATTEMPTS ]]; then + echo "$(timestamp) Is Master node initialized? Check Master's logs. Terminating Worker node with IP: ${NODE_IP} and nodeName: ${NODE_NAME}." + return 1 + fi -if [[ "${FEDERATION_ROLE}" != "master" ]]; then #worker - echo "*******************************Stopping Worker**************************************" - if [[ "$(curl -s ${CONSULURL}/v1/health/state/passing | jq -r '.[].Status')" = "passing" ]]; then - deleteKeysFromConsul "$EXAREME_ACTIVE_WORKERS_PATH" - fi - if [[ "$(curl -s -o /dev/null -i -w "%{http_code}\n" ${CONSULURL}/v1/kv/${EXAREME_MASTER_PATH}/?keys)" = "200" ]]; then - MY_IP=$(/sbin/ifconfig eth0 | grep "inet" | awk -F: '{print $2}' | cut -d ' ' -f 1) - MASTER_IP=$(curl -s ${CONSULURL}/v1/kv/${EXAREME_MASTER_PATH}/$(curl -s ${CONSULURL}/v1/kv/${EXAREME_MASTER_PATH}/?keys | jq -r '.[]' | sed "s/${EXAREME_MASTER_PATH}\///g")?raw) - #Delete worker from master's registry - curl -s ${MASTER_IP}:9091/remove/worker?IP=${MY_IP} #TODO check if that was done? - fi - stop_exareme -else #master - echo "*******************************Stopping Master**************************************" - if [[ "$(curl -s ${CONSULURL}/v1/health/state/passing | jq -r '.[].Status')" = "passing" ]]; then - deleteKeysFromConsul "$EXAREME_MASTER_PATH" - fi - stop_exareme -fi -exit 0 + attempts=$(($attempts + 1)) + sleep 5 + done + + MASTER_IP=$(curl -s ${CONSULURL}/v1/kv/${CONSUL_MASTER_PATH}/$(curl -s ${CONSULURL}/v1/kv/${CONSUL_MASTER_PATH}/?keys | jq -r '.[]' | sed "s/${CONSUL_MASTER_PATH}\///g")?raw) + + echo -e "\n$(timestamp) Fetched master node's IP ${MASTER_IP}" + return 0 } -mkdir -p /tmp/demo/db/ +# Convert CSVs to DB +convertCSVsToDB() { + + # Skip convertion if flag is false + if [[ ${CONVERT_CSVS} == "FALSE" ]]; then + echo "$(timestamp) CSV convertion turned off. " + return 0 + fi + + # Removing all previous .db files from the DOCKER_DATA_FOLDER + echo "$(timestamp) Deleting previous db files. " + rm -rf ${DOCKER_DATA_FOLDER}/**/*.db + + echo "$(timestamp) Parsing the csv files in " ${DOCKER_DATA_FOLDER} " to db files. " + python3 ./convert-csv-dataset-to-db.py -f ${DOCKER_DATA_FOLDER} + #Get the status code from previous command + py_script=$? + #If status code != 0 an error has occurred + if [[ ${py_script} -ne 0 ]]; then + echo "$(timestamp) Script: \"convert-csv-dataset-to-db.py\" exited with error." >&2 + exit 1 + fi +} -echo "Strarting Madis Server..." -python /root/madisServer/MadisServer.py & -echo "Madis Server started" - -#This is the Worker -if [[ "${FEDERATION_ROLE}" != "master" ]]; then - - DESC="exareme-worker" - MY_IP=$(/sbin/ifconfig eth0 | grep "inet" | awk -F: '{print $2}' | cut -d ' ' -f 1) - - #Try accessing Consul[key-value store] - echo -e "\nWorker node["${NODE_NAME}","${MY_IP}"] trying to connect with Consul[key-value store]" - n=0 - #Wait until Consul [key-value store] is up and running - while [[ "$(curl -s ${CONSULURL}/v1/health/state/passing | jq -r '.[].Status')" != "passing" ]]; do - echo -e "\nWorker node["${NODE_NAME}","${MY_IP}"] trying to connect with Consul[key-value store]" - sleep 2 - n=$((${n} + 1)) - #After 4 attempts-Show error - if [[ ${n} -ge 5 ]]; then - echo -e "\nConsul[key-value store] may not be initialized or Worker node["${NODE_NAME}","${MY_IP}"] can not contact Consul[key-value store]" - exit 1 #Simple exit 1. Exareme is not up yet - fi - done - - #Try retrieve Master's IP from Consul[key-value store] - echo -e "Retrieving Master's info from Consul[key-value store]" - n=0 - while [[ "$(curl -s -o /dev/null -i -w "%{http_code}\n" ${CONSULURL}/v1/kv/${EXAREME_MASTER_PATH}/?keys)" != "200" ]]; do - sleep 5 - n=$((${n} + 1)) - echo -e "Retrieving Master's info from Consul[key-value store]" - if [[ ${n} -ge 30 ]]; then - echo "Is Master node initialized? Check Master's logs. Terminating Worker node["${NODE_NAME}","${MY_IP}"]" - exit 1 - fi - done - - #Get Master's IP/Name from Consul[key-value store] - MASTER_IP=$(curl -s ${CONSULURL}/v1/kv/${EXAREME_MASTER_PATH}/$(curl -s ${CONSULURL}/v1/kv/${EXAREME_MASTER_PATH}/?keys | jq -r '.[]' | sed "s/${EXAREME_MASTER_PATH}\///g")?raw) - MASTER_NAME=$(curl -s ${CONSULURL}/v1/kv/${EXAREME_MASTER_PATH}/?keys | jq -r '.[]' | sed "s/${EXAREME_MASTER_PATH}\///g") - - #CSVs to DB - transformCsvToDB "worker" - - . ./start-worker.sh - echo "Worker node["${MY_IP}","${NODE_NAME}]" trying to connect with Master node["${MASTER_IP}","${MASTER_NAME}"]" - while [[ ! -f /var/log/exareme.log ]]; do - echo "Worker node["${MY_IP}","${NODE_NAME}]" trying to connect with Master node["${MASTER_IP}","${MASTER_NAME}"]" - sleep 1 - done - echo "Waiting to establish connection for Worker node["${MY_IP}","${NODE_NAME}"] with Master node["${MASTER_IP}","${MASTER_NAME}"]" - tail -f /var/log/exareme.log | while read LOGLINE - do - [[ "${LOGLINE}" == *"Worker node started."* ]] && pkill -P $$ tail - echo "Waiting to establish connection for Worker node["${MY_IP}","${NODE_NAME}"] with Master node["${MASTER_IP}","${MASTER_NAME}"]" - sleep 1 - - #Java's exception in StartWorker.java - if [[ "${LOGLINE}" == *"java.rmi.RemoteException"* ]]; then - exit 1 #Simple exit 1. Exareme is not up yet - fi - done - - if [[ ${ENVIRONMENT_TYPE} == "DEV" ]] || [[ ${ENVIRONMENT_TYPE} == "TEST" ]]; then - echo "Running set-local-datasets." - ./set-local-datasets.sh - - echo -e "\nDEV version: Worker node["${MY_IP}","${NODE_NAME}"] may be connected to Master node["${MASTER_IP}","${MASTER_NAME}"]" - curl -s -X PUT -d @- ${CONSULURL}/v1/kv/${EXAREME_ACTIVE_WORKERS_PATH}/${NODE_NAME} <<< ${MY_IP} - elif [[ ${ENVIRONMENT_TYPE} == "PROD" ]]; then - #Health check for Worker. HEALTH_CHECK algorithm execution - echo "Health check for Worker node["${MY_IP}","${NODE_NAME}"]" - - check="$(curl -s ${MASTER_IP}:9092/check/worker?IP_MASTER=${MASTER_IP}?IP_WORKER=${MY_IP})" - - if [[ -z ${check} ]]; then - #If curl returned nothing, something is wrong. We can not know what is wrong though.. - printf "Health_Check algorithm did not return anything...Switch ENVIRONMENT_TYPE to 'DEV' to see Error messages coming\ -from EXAREME..Exiting" - exit 1 - else - #check if what curl returned is JSON - echo ${check} | jq empty - #if NOT JSON an error code will be returned (!=0) - check_code=$? - if [[ ${check_code} -ne 0 ]]; then - echo "An error has occurred: " ${check} ".....Exiting" - exit 1 - else - getNames="$( echo ${check} | jq '.active_nodes')" - - #Retrieve result as json. If $NODE_NAME exists in result, the algorithm run in the specific node - if [[ ${getNames} = *${NODE_NAME}* ]]; then - echo -e "\nWorker node["${MY_IP}","${NODE_NAME}"] connected to Master node["${MASTER_IP}","${MASTER_NAME}"]" - curl -s -X PUT -d @- ${CONSULURL}/v1/kv/${EXAREME_ACTIVE_WORKERS_PATH}/${NODE_NAME} <<< ${MY_IP} - - echo "Running set-local-datasets." - ./set-local-datasets.sh - - else - echo ${check} - echo "Worker node["${MY_IP}","${NODE_NAME}]" seems that is not connected with the Master..\ -Switch ENVIRONMENT_TYPE to 'DEV' to see Error messages coming from EXAREME..Exiting..." - exit 1 - fi - fi - fi +# Health check for exareme nodes. +# Health check from MASTER checks all nodes. +# Health check from WORKERS checks only that specific node. +exaremeNodesHealthCheck() { + if [[ ${ENVIRONMENT_TYPE} != "PROD" ]]; then + return 0 + fi + + echo "$(timestamp) HEALTH CHECK for node with IP ${NODE_IP} and name ${NODE_NAME} ." + + if [[ "${FEDERATION_ROLE}" == "master" ]]; then + check=$(curl -s -X POST --max-time ${EXAREME_NODE_HEALTH_CHECK_TIMEOUT} ${NODE_IP}:9090/mining/query/HEALTH_CHECK) + else + check=$(curl -s --max-time ${EXAREME_NODE_HEALTH_CHECK_TIMEOUT} "${MASTER_IP}:9092/check/worker?NODE_IP=${NODE_IP}&NODE_NAME=${NODE_NAME}") + fi + + if [[ -z ${check} ]]; then + return 1 + fi + + # Check if what curl returned is JSON + echo ${check} | jq empty + check_code=$? + if [[ ${check_code} -ne 0 ]]; then + return 1 + fi + + # Retrieve result as json. If $NODE_NAME exists in result, the algorithm run in the specific node + getNames="$(echo ${check} | jq '.active_nodes')" + if ! [[ ${getNames} == *${NODE_NAME}* ]]; then + return 1 + fi + + return 0 +} + +# Exareme health check on startup +startupExaremeNodesHealthCheck() { + # If health check fails then try again until it succeeds or close the container. + attempts=0 + while ! exaremeNodesHealthCheck; do + if [[ $attempts -ge $EXAREME_NODE_STARTUP_HEALTH_CHECK_MAX_ATTEMPTS ]]; then + echo -e "\n$(timestamp) HEALTH CHECK FAILED. Closing the container." + return 1 # Exiting fi + echo "$(timestamp) HEALTH CHECK failed. Trying again..." + attempts=$(($attempts + 1)) + sleep $EXAREME_HEALTH_CHECK_AWAIT_TIME + done + echo "$(timestamp) HEALTH CHECK successful on NODE_IP: $NODE_IP" + return 0 +} -#This is the Master -else - DESC="exareme-master" - MY_IP=$(/sbin/ifconfig eth0 | grep "inet" | awk -F: '{print $2}' | cut -d ' ' -f 1) - - echo -e "\nMaster node["${NODE_NAME}","${MY_IP}"] trying to connect with Consul[key-value store]" - - #Try accessing Consul[key-value store] - echo -e "\nMaster node["${NODE_NAME}","${MY_IP}"] trying to connect with Consul[key-value store]" - n=0 - #Wait until Consul [key-value store] is up and running - while [[ "$(curl -s ${CONSULURL}/v1/health/state/passing | jq -r '.[].Status')" != "passing" ]]; do - echo -e "\nMaster node["${NODE_NAME}","${MY_IP}"] trying to connect with Consul[key-value store]" - n=$((${n} + 1)) - sleep 1 - #After 30 attempts-Show error - if [[ ${n} -ge 30 ]]; then - echo -e "\nConsul[key-value store] may not be initialized or Master node["${NODE_NAME}","${MY_IP}"] can not contact Consul[key-value store]" - exit 1 #Simple exit 1. Exareme is not up yet - fi - done - - #CSVs to DB - transformCsvToDB "master" - - #Master re-booted - if [[ "$(curl -s -o /dev/null -i -w "%{http_code}\n" ${CONSULURL}/v1/kv/${EXAREME_MASTER_PATH}/?keys)" = "200" ]]; then - #Workers connected to Master node - if [[ "$(curl -s -o /dev/null -i -w "%{http_code}\n" ${CONSULURL}/v1/kv/${EXAREME_ACTIVE_WORKERS_PATH}/?keys)" = "200" ]]; then - : - #TODO check what if master restarts with different IP while workers are already connected to the master's registry with previous IP - else - ./exareme-admin.sh --start - echo "Master node["${MY_IP}","$NODE_NAME"] trying to re-boot..." - while [[ ! -f /var/log/exareme.log ]]; do - echo "Master node["$MY_IP"," $NODE_NAME"] re-booted..." - done - fi - - #Master node just created - else - ./exareme-admin.sh --start - echo "Initializing Master node["${MY_IP}","${NODE_NAME}"]" - - while [[ ! -f /var/log/exareme.log ]]; do - echo "Initializing Master node["${MY_IP}","${NODE_NAME}"]" - done - echo "Initializing Master node["${MY_IP}","${NODE_NAME}"]" - tail -f /var/log/exareme.log | while read LOGLINE - do - [[ "${LOGLINE}" == *"Master node started."* ]] && pkill -P $$ tail - echo "Initializing Master node["${MY_IP}","${NODE_NAME}"]" - - #Java's exception in StartMaster.java - if [[ "${LOGLINE}" == *"java.rmi.RemoteException"* ]]; then - exit 1 #Simple exit 1. Exareme is not up yet - fi - done +# Periodic check for exareme's health. +# If it fails shutdown the container +periodicExaremeNodesHealthCheck() { + # If consul doesn't have master node's IP it means that it restarted. The nodes should restart. + if ! getMasterIPFromConsul; then + pkill -f 1 # Closing main bootstrap.sh process to stop the container. + fi + + # Make a health check every 5 minutes. + while true; do + sleep $PERIODIC_EXAREME_NODES_HEALTH_CHECK_INTERVAL + + # If health check fails then try again until it succeeds or close the container. + attempts=0 + while ! exaremeNodesHealthCheck; do + if [[ $attempts -ge $PERIODIC_EXAREME_NODES_HEALTH_CHECK_MAX_RETRIES ]]; then + echo -e "\n$(timestamp) HEALTH CHECK FAILED. Closing the container." + pkill -f 1 # Closing main bootstrap.sh process to stop the container. + fi + echo "$(timestamp) HEALTH CHECK failed. Trying again..." + attempts=$(($attempts + 1)) + sleep $EXAREME_HEALTH_CHECK_AWAIT_TIME + done + echo "$(timestamp) HEALTH CHECK successful on NODE_IP: $NODE_IP" + done +} + +# Periodic check that the master node is reachable +# If it fails shutdown the container +periodicReachableMasterNodeCheck() { + # If consul doesn't have master node's IP it means that it restarted. The nodes should restart. + if ! getMasterIPFromConsul; then + pkill -f 1 # Closing main bootstrap.sh process to stop the container. + fi + + # Check that master is reachable every 5 seconds. + while true; do + sleep 5 - if [[ ${ENVIRONMENT_TYPE} == "DEV" ]] || [[ ${ENVIRONMENT_TYPE} == "TEST" ]] ; then - echo "Running set-local-datasets." - ./set-local-datasets.sh - - echo -e "\nDEV version: Master node["${MY_IP}","${NODE_NAME}"] may be initialized" - curl -s -X PUT -d @- ${CONSULURL}/v1/kv/${EXAREME_MASTER_PATH}/${NODE_NAME} <<< ${MY_IP} - elif [[ ${ENVIRONMENT_TYPE} == "PROD" ]]; then - #Health check for Master. HEALTH_CHECK algorithm execution - echo "Health check for Master node["${MY_IP}","${NODE_NAME}"]" - - check=$(curl -s ${MY_IP}:9092/check/worker?IP_MASTER=${MY_IP}?IP_WORKER=${MY_IP}) #Master has a Worker instance. So in this case IP_MASTER / IP_WORKER is the same - - if [[ -z ${check} ]]; then - #if curl returned nothing, something is wrong. We can not know what is wrong though - printf "Health_Check algorithm did not return anything...Switch ENVIRONMENT_TYPE to 'DEV' to see Error messages coming\ -from EXAREME..Exiting" - exit 1 - else - #check if what curl returned is JSON - echo ${check} | jq empty - #if NOT JSON an error code will be returned (!=0) - check_code=$? - if [[ ${check_code} -ne 0 ]]; then - echo "An error has occurred: " ${check} ".....Exiting" - exit 1 - else - getNames="$( echo ${check} | jq '.active_nodes')" - - #Retrieve result as json. If $NODE_NAME exists in result, the algorithm run in the specific node - if [[ ${getNames} = *${NODE_NAME}* ]]; then - echo -e "\nMaster node["${MY_IP}","${NODE_NAME}"] initialized" - curl -s -X PUT -d @- ${CONSULURL}/v1/kv/${EXAREME_MASTER_PATH}/${NODE_NAME} <<< ${MY_IP} - - echo "Running set-local-datasets." - ./set-local-datasets.sh - - else - echo ${check} - echo "Master node["${MY_IP}","${NODE_NAME}]" seems that could not be initialized..\ -Switch ENVIRONMENT_TYPE to 'DEV' to see Error messages coming from EXAREME..Exiting..." - exit 1 - fi - fi - fi - fi + if ! ping -c 5 -W 2 $MASTER_IP &>/dev/null ; then + echo -e "\n$(timestamp) MASTER NODE IS UNREACHABLE. Closing the container." + pkill -f 1 # Closing main bootstrap.sh process to stop the container. fi -fi + + #echo "$(timestamp) HEALTH CHECK successful from NODE_IP: $NODE_IP, MASTER NODE IS REACHABLE" + done +} + +# Periodic deletion of temp files +startTempFilesDeletionTask() { + while true; do + sleep $PERIODIC_TEMP_FILES_REMOVAL + if [ $FEDERATION_ROLE = "master" ]; then + cd /tmp/demo/db/; + find . -type d -path "./*" -mmin +$TEMP_FILES_CLEANUP_TIME -exec rm -rf {} +; + cd /tmp/demo/algorithms-generation/; + find . -type d -path "./*" -mmin +$TEMP_FILES_CLEANUP_TIME -exec rm -rf {} +; + else + cd /tmp/demo/db/; + find . -type d -path "./*" -mmin +$TEMP_FILES_CLEANUP_TIME -exec rm -rf {} +; + find . -type f -path "./*" -mmin +$TEMP_FILES_CLEANUP_TIME -delete; + fi + done +} + +mkdir -p /tmp/demo/db/ + +# Getting the IP and removing white spaces +NODE_IP=$(hostname -i | sed 's/ *$//g') + +# Start Exareme and MadisServer +echo "Starting Madis Server..." +python /root/madisServer/MadisServer.py & +echo "Madis Server started." + +waitForConsulToStart -echo '*/15 * * * * ./set-local-datasets.sh' >> /etc/crontabs/root +# Prepare datasets from CSVs to SQLite db files +convertCSVsToDB + +# Updating consul with node's datasets. +echo "$(timestamp) Updating consul with node's datasets." +./set-local-datasets.sh + +# Running bootstrap on a master node +if [[ "${FEDERATION_ROLE}" == "master" ]]; then + + echo "$(timestamp) Starting Exareme on master node with IP: ${NODE_IP} and nodeName: ${NODE_NAME}" + ./exareme-admin.sh --start + + # Updating consul with node IP + echo -e "\n$(timestamp) Updating consul with master node IP." + curl -s -X PUT -d @- ${CONSULURL}/v1/kv/${CONSUL_MASTER_PATH}/${NODE_NAME} <<<${NODE_IP} + curl -s -X PUT -d @- ${CONSULURL}/v1/kv/${CONSUL_ACTIVE_WORKERS_PATH}/${NODE_NAME} <<<${NODE_IP} + + periodicExaremeNodesHealthCheck & + +else ##### Running bootstrap on a worker node ##### + + if ! getMasterIPFromConsul; then + echo "$(timestamp) Could not fetch master node's IP. Exiting..." + exit 1 + fi + + echo "$(timestamp) Starting Exareme on worker node with IP: ${NODE_IP} and nodeName: ${NODE_NAME}" + . ./start-worker.sh + + if ! startupExaremeNodesHealthCheck; then + echo "$(timestamp) HEALTH CHECK algorithm failed. Switch ENVIRONMENT_TYPE to 'DEV' to see error messages coming from EXAREME. Exiting..." + exit 1 + fi + + # Updating consul with node IP + echo -e "\n$(timestamp) Updating consul with worker node IP." + curl -s -X PUT -d @- ${CONSULURL}/v1/kv/${CONSUL_ACTIVE_WORKERS_PATH}/${NODE_NAME} <<<${NODE_IP} + + periodicExaremeNodesHealthCheck & + + periodicReachableMasterNodeCheck & + +fi -echo '0 * * * * if [ $FEDERATION_ROLE = "master" ]; then \ -cd /tmp/demo/db/ \ -&& find . -type d -path "./*" -mmin +$TEMP_FILES_CLEANUP_TIME -exec rm -rf {} +\ -&& cd /tmp/demo/algorithms-generation/ \ -&& find . -type d -path "./*" -mmin +$TEMP_FILES_CLEANUP_TIME -exec rm -rf {} +;\ -else \ -cd /tmp/demo/db/ \ -&& find . -type d -path "./*" -mmin +$TEMP_FILES_CLEANUP_TIME -exec rm -rf {} +\ -&& find . -type f -path "./*" -mmin +$TEMP_FILES_CLEANUP_TIME -delete; \ -fi' >> /etc/crontabs/root -crond +startTempFilesDeletionTask & # Creating the python log file -echo "Exareme Python Algorithms log file created." > /var/log/exaremePythonAlgorithms.log +echo "$(timestamp) Exareme Python Algorithms log file created." >/var/log/exaremePythonAlgorithms.log -# Running something in foreground, otherwise the container will stop -while true -do - tail -fn +1 /var/log/exareme.log -fn +1 /var/log/exaremePythonAlgorithms.log -fn +1 /var/log/MadisServer.log -done +# Printing logs of Exareme, madis server and python algorithms. +tail -fn +1 /var/log/exareme.log -fn +1 /var/log/exaremePythonAlgorithms.log -fn +1 /var/log/MadisServer.log diff --git a/Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py b/Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py index 20d6b564e82b8b99b998db664d19b2eb2ae49a05..4ad34144061d9de2cdc941aca2dd9fd91b8d443d 100755 --- a/Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py +++ b/Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py @@ -1,28 +1,30 @@ -#!/usr/bin/python +#!/usr/bin/python3 # -*- coding: utf-8 -*- """ This script creates multiple dbs for each pathology folder containing a dataset csv file and a metadata json file. """ -import os -import sys import csv -import sqlite3 import json +import os +import sqlite3 from argparse import ArgumentParser +MAX_ROWS_TO_INSERT_INTO_SQL = 100 + + # This metadata dictionary contains only code and sqltype so that processing will be faster # It also includes the subjectcode def createMetadataDictionary(CDEsMetadataPath): - CDEsMetadata = open(CDEsMetadataPath) + CDEsMetadata = open(CDEsMetadataPath, "r", encoding="utf-8") metadataJSON = json.load(CDEsMetadata) metadataDictionary = {} metadataDictionary['subjectcode'] = 'text' metadataDictionary['dataset'] = 'text' metadataDictionary = addGroupVariablesToDictionary(metadataJSON, - metadataDictionary) + metadataDictionary) return metadataDictionary @@ -30,18 +32,19 @@ def addGroupVariablesToDictionary(groupMetadata, metadataDictionary): if 'variables' in groupMetadata: for variable in groupMetadata['variables']: if 'sql_type' not in variable: - raise ValueError('The variable "' + variable['code'] + '" does not contain the sql_type field in the metadata.') + raise ValueError( + 'The variable "' + variable['code'] + '" does not contain the sql_type field in the metadata.') metadataDictionary[variable['code']] = variable['sql_type'] if 'groups' in groupMetadata: for group in groupMetadata['groups']: metadataDictionary = addGroupVariablesToDictionary(group, - metadataDictionary) + metadataDictionary) return metadataDictionary # This metadata list is used to create the metadata table. It contains all the known information for each variable. def createMetadataList(CDEsMetadataPath): - CDEsMetadata = open(CDEsMetadataPath) + CDEsMetadata = open(CDEsMetadataPath, "r", encoding="utf-8") metadataJSON = json.load(CDEsMetadata) metadataList = [] @@ -54,46 +57,50 @@ def addGroupVariablesToList(groupMetadata, metadataList): for variable in groupMetadata['variables']: variableDictionary = {} variableDictionary['code'] = variable['code'] - + if 'label' not in variable: - raise ValueError('The variable "' + variable['code'] + '" does not contain the label field in the metadata.') + raise ValueError( + 'The variable "' + variable['code'] + '" does not contain the label field in the metadata.') variableDictionary['label'] = variable['label'] - + if 'sql_type' not in variable: - raise ValueError('The variable "' + variable['code'] + '" does not contain the sql_type field in the metadata.') + raise ValueError( + 'The variable "' + variable['code'] + '" does not contain the sql_type field in the metadata.') variableDictionary['sql_type'] = variable['sql_type'] - + if 'isCategorical' not in variable: - raise ValueError('The variable "' + variable['code'] + '" does not contain the isCategorical field in the metadata.') + raise ValueError( + 'The variable "' + variable['code'] + '" does not contain the isCategorical field in the metadata.') variableDictionary['isCategorical'] = '1' if variable['isCategorical'] else '0' - + if variable['isCategorical'] and 'enumerations' not in variable: - raise ValueError('The variable "' + variable['code'] + '" does not contain enumerations even though it is categorical.') - - if 'enumerations' in variable: + raise ValueError('The variable "' + variable[ + 'code'] + '" does not contain enumerations even though it is categorical.') + + if 'enumerations' in variable: enumerations = [] for enumeration in variable['enumerations']: - enumerations.append(unicode(enumeration['code'])) + enumerations.append(str(enumeration['code'])) variableDictionary['enumerations'] = ','.join(enumerations) else: variableDictionary['enumerations'] = None - + if 'min' in variable: variableDictionary['min'] = variable['min'] else: variableDictionary['min'] = None - + if 'max' in variable: variableDictionary['max'] = variable['max'] else: variableDictionary['max'] = None - + metadataList.append(variableDictionary) - + if 'groups' in groupMetadata: for group in groupMetadata['groups']: metadataList = addGroupVariablesToList(group, - metadataList) + metadataList) return metadataList @@ -123,40 +130,43 @@ def addMetadataInTheDatabase(CDEsMetadataPath, cur): insertVariableQuery += "'" + variable['code'] + "'" insertVariableQuery += ", '" + variable['label'] + "'" insertVariableQuery += ", '" + variable['sql_type'] + "'" - insertVariableQuery += ", " + variable['isCategorical'] - if variable['enumerations'] : + insertVariableQuery += ", " + variable['isCategorical'] + if variable['enumerations']: insertVariableQuery += ", '" + variable['enumerations'] + "'" else: insertVariableQuery += ", NULL" - - if variable['min'] : + + if variable['min']: insertVariableQuery += ", '" + variable['min'] + "'" else: insertVariableQuery += ", NULL" - - if variable['max'] : + + if variable['max']: insertVariableQuery += ", '" + variable['max'] + "'" else: - insertVariableQuery += ", NULL" - + insertVariableQuery += ", NULL" + insertVariableQuery += ");" - + try: cur.execute(insertVariableQuery) except sqlite3.IntegrityError: - raise ValueError ('Failed to execute query: ' + insertVariableQuery + ' , due to database constraints.') - + raise ValueError('Failed to execute query: ' + insertVariableQuery + ' , due to database constraints.') + def createDataTable(metadataDictionary, cur): # Create the query for the sqlite data table createDataTableQuery = 'CREATE TABLE DATA(' for column in metadataDictionary: - if metadataDictionary[column] in ['INT','int','Int']: - createDataTableQuery += column + ' ' + metadataDictionary[column] + ' CHECK (TYPEOF(' + column + ') = "integer" OR TYPEOF(' + column + ') = "null"), ' - elif metadataDictionary[column] in ['REAL','real','Real']: - createDataTableQuery += column + ' ' + metadataDictionary[column] + ' CHECK (TYPEOF(' + column + ') = "real" OR TYPEOF(' + column + ') = "integer" OR TYPEOF(' + column + ') = "null"), ' - elif metadataDictionary[column] in ['TEXT','text','Text']: - createDataTableQuery += column + ' ' + metadataDictionary[column] + ' CHECK (TYPEOF(' + column + ') = "text" OR TYPEOF(' + column + ') = "null"), ' + if metadataDictionary[column] in ['INT', 'int', 'Int']: + createDataTableQuery += column + ' ' + metadataDictionary[ + column] + ' CHECK (TYPEOF(' + column + ') = "integer" OR TYPEOF(' + column + ') = "null"), ' + elif metadataDictionary[column] in ['REAL', 'real', 'Real']: + createDataTableQuery += column + ' ' + metadataDictionary[ + column] + ' CHECK (TYPEOF(' + column + ') = "real" OR TYPEOF(' + column + ') = "integer" OR TYPEOF(' + column + ') = "null"), ' + elif metadataDictionary[column] in ['TEXT', 'text', 'Text']: + createDataTableQuery += column + ' ' + metadataDictionary[ + column] + ' CHECK (TYPEOF(' + column + ') = "text" OR TYPEOF(' + column + ') = "null"), ' # Remove the last comma createDataTableQuery = createDataTableQuery[:-2] createDataTableQuery += ')' @@ -167,11 +177,10 @@ def createDataTable(metadataDictionary, cur): def addCSVInTheDataTable(csvFilePath, metadataDictionary, cur): - # Open the csv - csvFile = open(csvFilePath, 'r') + csvFile = open(csvFilePath, "r", encoding="utf-8") csvReader = csv.reader(csvFile) - + # Create the csv INSERT statement csvHeader = next(csvReader) columnsString = csvHeader[0] @@ -179,30 +188,55 @@ def addCSVInTheDataTable(csvFilePath, metadataDictionary, cur): if column not in metadataDictionary: raise KeyError('Column ' + column + ' does not exist in the metadata!') columnsString += ', ' + column - columnsQuery = 'INSERT INTO DATA (' + columnsString + ') VALUES (' + columnsSectionOfSQLQuery = 'INSERT INTO DATA (' + columnsString + ') VALUES ' # Insert data + numberOfRows = 0 + valuesSectionOfSQLQuery = '(' for row in csvReader: - insertRowQuery = columnsQuery + numberOfRows += 1 for (value, column) in zip(row, csvHeader): if metadataDictionary[column] == 'text': - insertRowQuery += "'" + value + "', " + valuesSectionOfSQLQuery += "'" + value + "', " elif value == '': - insertRowQuery += 'null, ' + valuesSectionOfSQLQuery += 'null, ' else: - insertRowQuery += value + ", " - insertRowQuery = insertRowQuery[:-2] - insertRowQuery += ');' + valuesSectionOfSQLQuery += value + ", " + if numberOfRows % int(MAX_ROWS_TO_INSERT_INTO_SQL) == 0: + valuesSectionOfSQLQuery = valuesSectionOfSQLQuery[:-2] + valuesSectionOfSQLQuery += ');' + + try: + cur.execute(columnsSectionOfSQLQuery + valuesSectionOfSQLQuery) + except: + findErrorOnBulkInsertQuery(cur, valuesSectionOfSQLQuery, csvHeader, metadataDictionary, csvFilePath) + raise ValueError("Error inserting the CSV to the database.") + valuesSectionOfSQLQuery = '(' + else: + valuesSectionOfSQLQuery = valuesSectionOfSQLQuery[:-2] + valuesSectionOfSQLQuery += '),(' + + if numberOfRows % int(MAX_ROWS_TO_INSERT_INTO_SQL) != 0: + valuesSectionOfSQLQuery = valuesSectionOfSQLQuery[:-3] + valuesSectionOfSQLQuery += ');' try: - cur.execute(insertRowQuery) + cur.execute(columnsSectionOfSQLQuery + valuesSectionOfSQLQuery) except: - findErrorOnSqlQuery(cur, row, csvHeader, metadataDictionary, csvFilePath) - raise ValueError('Row: ' + str(row) + ', Query: ' + str(insertRowQuery) + ', could not be inserted in the database.') + findErrorOnBulkInsertQuery(cur, valuesSectionOfSQLQuery, csvHeader, metadataDictionary, csvFilePath) -def findErrorOnSqlQuery(cur, row, csvHeader, metadataDictionary, csvFilePath): +def findErrorOnBulkInsertQuery(cur, valuesOfQuery, csvHeader, metadataDictionary, csvFilePath): + # Removing the first and last parenthesis + valuesOfQuery = valuesOfQuery[1:-2] + # Removing the ' from character values + valuesOfQuery = valuesOfQuery.replace("\'", "") + # Call findErrorOnSqlQuery for each row in the bulk query + for row in valuesOfQuery.split('),('): + findErrorOnSqlQuery(cur, row.split(','), csvHeader, metadataDictionary, csvFilePath) + +def findErrorOnSqlQuery(cur, row, csvHeader, metadataDictionary, csvFilePath): # Insert the code column into the database and then update it for each row to find where the problem is firstRow = True @@ -213,7 +247,7 @@ def findErrorOnSqlQuery(cur, row, csvHeader, metadataDictionary, csvFilePath): insertQuery = "INSERT INTO DATA (subjectcode) VALUES ('" + value + "');" cur.execute(insertQuery) continue; - + if metadataDictionary[column] == 'text': updateQuery = "UPDATE DATA SET " + column + " = '" + value + "' WHERE subjectcode = '" + code + "';"; elif value == '': @@ -224,56 +258,58 @@ def findErrorOnSqlQuery(cur, row, csvHeader, metadataDictionary, csvFilePath): try: cur.execute(updateQuery) except: - raise ValueError("Error inserting into the database. Could not insert value: '" + value + "', into column: '" + column + "', at row with subjectcode: " + code + ", while inserting csv: " + csvFilePath) - + raise ValueError( + "Error inserting into the database. Could not insert value: '" + value + "', into column: '" + column + "', at row with subjectcode: " + code + ", while inserting csv: " + csvFilePath) def main(): - # Read the parameters parser = ArgumentParser() parser.add_argument('-f', '--pathologiesFolderPath', required=True, help='The folder with the pathologies data.') - parser.add_argument('-t', '--nodeType', required=True, - help='Is this a master or a worker node?' + parser.add_argument('-p', '--pathologies', required=False, + help='Specific pathologies to parse. (Example: "dementia,tbi"' ) args = parser.parse_args() - pathologiesFolderPath = os.path.abspath(args.pathologiesFolderPath) - + # Get all pathologies pathologiesList = next(os.walk(pathologiesFolderPath))[1] - + + if args.pathologies != None: + pathologiesToConvert = args.pathologies.split(",") + pathologiesList = list(set(pathologiesList) & set(pathologiesToConvert)) + print ("Converting csvs for pathologies: " + ",".join(pathologiesList)) + # Create the datasets db for each pathology for pathologyName in pathologiesList: - + # Initializing metadata and output absolute path - CDEsMetadataPath = os.path.join(pathologiesFolderPath,pathologyName,"CDEsMetadata.json") - outputDBAbsPath = os.path.join(pathologiesFolderPath,pathologyName,"datasets.db") + CDEsMetadataPath = os.path.join(pathologiesFolderPath, pathologyName, "CDEsMetadata.json") + outputDBAbsPath = os.path.join(pathologiesFolderPath, pathologyName, "datasets.db") # Connect to the database con = sqlite3.connect(outputDBAbsPath) cur = con.cursor() - + # Add the metadata table + rows addMetadataInTheDatabase(CDEsMetadataPath, cur) - + # Transform the metadata json into a column name -> column type list metadataDictionary = createMetadataDictionary(CDEsMetadataPath) - + # Create the data table with the header createDataTable(metadataDictionary, cur) - + # Add all the csvs in the database - for csv in os.listdir(os.path.join(pathologiesFolderPath,pathologyName)): + for csv in os.listdir(os.path.join(pathologiesFolderPath, pathologyName)): if csv.endswith('.csv'): - csvFilePath = os.path.join(pathologiesFolderPath,pathologyName,csv) + csvFilePath = os.path.join(pathologiesFolderPath, pathologyName, csv) addCSVInTheDataTable(csvFilePath, metadataDictionary, cur) - - + con.commit() con.close() if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/Exareme-Docker/files/root/exareme/exareme-admin.sh b/Exareme-Docker/files/root/exareme/exareme-admin.sh index b0566d2b08f0d18307928284d12e28e9c2f47258..0762543c043c83cafbecccec32b3e4b0682081bd 100755 --- a/Exareme-Docker/files/root/exareme/exareme-admin.sh +++ b/Exareme-Docker/files/root/exareme/exareme-admin.sh @@ -13,15 +13,13 @@ if [[ -z ${EXAREME_HOME} ]]; then export EXAREME_HOME="$(pwd)"; fi fi -echo "EXAREME HOME DIR: $EXAREME_HOME"; #load environmental variables like JAVA, python . ./exareme-env.sh &> /dev/null -#maybe simply pass MASTER_IP from bootstrap -EXAREME_MASTER=`/sbin/ifconfig eth0 | grep "inet" | awk -F: '{print $2}' | cut -d ' ' -f 1`; -echo "EXAREME_HOST : $EXAREME_MASTER"; -echo "EXAREME_USER: $EXAREME_USER"; +# Getting the IP and removing white spaces +EXAREME_MASTER=$(hostname -i | sed 's/ *$//g') + #################################################################################################### # parse command line arguments #################################################################################################### @@ -106,18 +104,12 @@ function start_exareme(){ #Starts exareme daemon -Dcom.sun.management.jmxremote.ssl=false \ -Djava.security.egd=file:///dev/urandom " - DESC="exareme-master" EXAREME_ADMIN_CLASS=${EXAREME_ADMIN_MASTER_CLASS} - echo ${EXAREME_ADMIN_CLASS_PATH} - echo ${EXAREME_JAVA} - echo ${EXAREME_ADMIN_CLASS} - echo ${EXAREME_MASTER} - mkdir -p /tmp/exareme/var/log /tmp/exareme/var/run $EXAREME_JAVA -cp $EXAREME_ADMIN_CLASS_PATH \ - $EXAREME_ADMIN_OPTS $EXAREME_ADMIN_CLASS > /var/log/exareme.log 2>&1 & echo $! > /tmp/exareme/var/run/$DESC.pid #-cp requires class path specification + $EXAREME_ADMIN_OPTS $EXAREME_ADMIN_CLASS > /var/log/exareme.log 2>&1 & echo $! > /tmp/exareme/var/run/exareme-master.pid #-cp requires class path specification exit 0 diff --git a/Exareme-Docker/files/root/exareme/set-local-datasets.sh b/Exareme-Docker/files/root/exareme/set-local-datasets.sh index 7e90cdce2a1e0d8ef32155efe274d9791e75ad93..5281815838fe4f04ad2901e5b92f845a04316d4c 100755 --- a/Exareme-Docker/files/root/exareme/set-local-datasets.sh +++ b/Exareme-Docker/files/root/exareme/set-local-datasets.sh @@ -13,7 +13,7 @@ do pathology=$(basename ${PATHOLOGY}) - curl -s -X PUT -d @- ${CONSULURL}/v1/kv/${DATA}/${NODE_NAME}/${pathology} <<< ${PATHOLOGY_DATASETS} + curl -s -X PUT -d @- ${CONSULURL}/v1/kv/${CONSUL_DATA_PATH}/${NODE_NAME}/${pathology} <<< ${PATHOLOGY_DATASETS} PATHOLOGY_DATASETS='' fi diff --git a/Exareme-Docker/files/root/exareme/start-worker.sh b/Exareme-Docker/files/root/exareme/start-worker.sh index f8f005c84a540e0ed192240fb315b7a184dff1d4..ec09e756820dc904b61eecc2347fc853ebda82ef 100755 --- a/Exareme-Docker/files/root/exareme/start-worker.sh +++ b/Exareme-Docker/files/root/exareme/start-worker.sh @@ -10,7 +10,7 @@ EXAREME_ADMIN_WORKER_CLASS="madgik.exareme.worker.admin.StartWorker" EXAREME_ADMIN_OPTS="${EXAREME_JAVA_OPTS} \ -Djava.rmi.server.codebase=file:$EXAREME_HOME/lib/exareme/ \ -Djava.security.policy=$EXAREME_HOME/etc/exareme/art.policy\ - -Djava.rmi.server.hostname=$MY_IP \ + -Djava.rmi.server.hostname=$NODE_IP \ -Dsun.rmi.activation.execTimeout=$NODE_COMMUNICATION_TIMEOUT \ -Dsun.rmi.activation.groupTimeout=$NODE_COMMUNICATION_TIMEOUT \ -Dsun.rmi.dgc.ackTimeout=$NODE_COMMUNICATION_TIMEOUT \ @@ -20,22 +20,14 @@ EXAREME_ADMIN_OPTS="${EXAREME_JAVA_OPTS} \ -Dcom.sun.management.jmxremote.ssl=false \ -Djava.security.egd=file:///dev/urandom " -DESC="exareme-worker" - EXAREME_ADMIN_CLASS=${EXAREME_ADMIN_WORKER_CLASS} EXAREME_ADMIN_CLASS_ARGS=${MASTER_IP} -echo ${EXAREME_ADMIN_CLASS_PATH} -echo ${EXAREME_JAVA} -echo ${EXAREME_ADMIN_CLASS} -echo ${EXAREME_ADMIN_CLASS_ARGS} - - mkdir -p /tmp/exareme/var/log /tmp/exareme/var/run ${EXAREME_JAVA} -cp ${EXAREME_ADMIN_CLASS_PATH} \ ${EXAREME_ADMIN_OPTS} ${EXAREME_ADMIN_CLASS} \ -${EXAREME_ADMIN_CLASS_ARGS} > /var/log/exareme.log 2>&1 & echo $! > /tmp/exareme/var/run/${DESC}.pid +${EXAREME_ADMIN_CLASS_ARGS} > /var/log/exareme.log 2>&1 & echo $! > /tmp/exareme/var/run/exareme-worker.pid -echo "${DESC} started." +echo "Worker started." diff --git a/Exareme-Docker/src/exareme/exareme-master/pom.xml b/Exareme-Docker/src/exareme/exareme-master/pom.xml index 0f9b7b392e3441b7bba05d31a9c79cbbd02fd8f2..9432c82b4ea6bcad959172e1617e76627f4563a6 100644 --- a/Exareme-Docker/src/exareme/exareme-master/pom.xml +++ b/Exareme-Docker/src/exareme/exareme-master/pom.xml @@ -159,6 +159,14 @@ </execution> </executions> </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + <configuration> + <source>8</source> + <target>8</target> + </configuration> + </plugin> </plugins> </build> </project> diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/admin/StartMaster.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/admin/StartMaster.java index 7385d7504d4e4fb0987d7d1cf21c2d756c17d5e9..71064ca76383a10962c793fae4f763894b1e3a7c 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/admin/StartMaster.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/admin/StartMaster.java @@ -38,7 +38,7 @@ public class StartMaster { int registryPort = AdpProperties.getArtProps().getInt("art.registry.rmi.defaultPort"); int dataTransferPort = AdpProperties.getArtProps().getInt("art.container.data.port"); - String logLevel = AdpProperties.getArtProps().getString("art.log.level"); + String logLevel = System.getenv("LOG_LEVEL"); Logger.getRootLogger().setLevel(Level.toLevel(logLevel)); try { diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/client/AdpDBClientQueryStatus.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/client/AdpDBClientQueryStatus.java index 14c9f40019b7b52ce5d8ffb1286d11f894c9c1be..4010a06f1e7aa9d903bd8a87960293b2e62c1175 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/client/AdpDBClientQueryStatus.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/client/AdpDBClientQueryStatus.java @@ -28,8 +28,8 @@ public interface AdpDBClientQueryStatus { void registerListener(AdpDBQueryListener listener) throws RemoteException; - InputStream getResult() throws RemoteException; + String getResult() throws RemoteException; - InputStream getResult(DataSerialization ds) throws RemoteException; + String getResult(DataSerialization ds) throws RemoteException; } diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/client/rmi/RmiAdpDBClientQueryStatus.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/client/rmi/RmiAdpDBClientQueryStatus.java index c6193488d208a2986410770f32e0022dba838a72..2e627f04e8a571558c10ac1c72991756316c20fe 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/client/rmi/RmiAdpDBClientQueryStatus.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/client/rmi/RmiAdpDBClientQueryStatus.java @@ -21,7 +21,9 @@ import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.rmi.RemoteException; +import java.util.Arrays; import java.util.HashMap; +import java.util.concurrent.FutureTask; /** * @author alex @@ -35,7 +37,8 @@ public class RmiAdpDBClientQueryStatus implements AdpDBClientQueryStatus { private String lastStatus; private TimeFormat timeF; private boolean finished; - private InputStream result; + private boolean error; + private String result; public RmiAdpDBClientQueryStatus(AdpDBQueryID queryId, AdpDBClientProperties properties, AdpDBQueryExecutionPlan plan, AdpDBStatus status) { @@ -45,6 +48,7 @@ public class RmiAdpDBClientQueryStatus implements AdpDBClientQueryStatus { this.lastStatus = null; this.timeF = new TimeFormat(TimeUnit.min); this.finished = false; + this.error = false; result = null; } @@ -61,13 +65,6 @@ public class RmiAdpDBClientQueryStatus implements AdpDBClientQueryStatus { if (!status.hasFinished() && !status.hasError()) return false; - try { - String algorithmResult = IOUtils.toString(getResult(DataSerialization.summary), StandardCharsets.UTF_8); - log.info("Algorithm with queryId" + getQueryID() + " terminated. Result: \n " + algorithmResult); - } catch (IOException e) { - log.error("Could not read the algorithm result table." + getQueryID()); - } - finished = true; return true; } @@ -113,7 +110,7 @@ public class RmiAdpDBClientQueryStatus implements AdpDBClientQueryStatus { } @Override - public InputStream getResult() throws RemoteException { + public String getResult() throws RemoteException { return getResult(DataSerialization.ldjson); } @@ -129,14 +126,27 @@ public class RmiAdpDBClientQueryStatus implements AdpDBClientQueryStatus { * @throws RemoteException */ @Override - public InputStream getResult(DataSerialization ds) throws RemoteException { + public String getResult(DataSerialization ds) throws RemoteException { // The registry should be updated the 1st time we fetch a result stream. if (result == null) { updateRegistry(); } - result = new RmiAdpDBClient(AdpDBManagerLocator.getDBManager(), properties) + InputStream resultStream = new RmiAdpDBClient(AdpDBManagerLocator.getDBManager(), properties) .readTable(plan.getResultTables().get(0).getName(), ds); + + FutureTask<String> getResultFromStream; + try { + getResultFromStream = new FutureTask<>(() -> + IOUtils.toString(resultStream, StandardCharsets.UTF_8)); + + new Thread(getResultFromStream).start(); + result = getResultFromStream.get(30, java.util.concurrent.TimeUnit.SECONDS); + } catch (Exception e) { + log.error("Error reading the result table! QueryID:" + status.getQueryID().getQueryID(), e); + throw new RemoteException("Could not read the result table!"); + } + return result; } diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/executor/remote/AdpDBArtJobMonitor.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/executor/remote/AdpDBArtJobMonitor.java index f72834358d4e04c2ae0925f130a32f636358284b..393f7ea87bbd1ac08ca8864cdd716f19d8931612 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/executor/remote/AdpDBArtJobMonitor.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/executor/remote/AdpDBArtJobMonitor.java @@ -59,18 +59,11 @@ public class AdpDBArtJobMonitor implements Runnable { statusManager.getStatistics(status.getId()).setTotalOperators(stats.getTotalProc()); statusManager.getStatistics(status.getId()).setTotalDataTransfers(stats.getTotalData()); - - while (sessionManager.hasFinished() == false && sessionManager.hasError() == false) { - - Thread.sleep(100 * statsUpdateSecs); + while (!sessionManager.hasFinished() && !sessionManager.hasError()) { boolean updateProgressStatistics = updateProgressStatistics(); - sessionManager = sessionPlan.getPlanSessionStatusManagerProxy(); - statsManager = sessionPlan.getPlanSessionStatisticsManagerProxy(); - if (sessionManager == null || statsManager == null) { - log.info("--+ error"); - } if (updateProgressStatistics) { - log.info("Session is running..."); + log.info("Session is updating... ID: " + sessionPlan.getSessionID().getLongId() + + " , QueryID: " + queryID.getQueryID()); log.debug("Update listeners ..."); synchronized (listeners) { for (AdpDBQueryListener l : listeners) { @@ -80,23 +73,38 @@ public class AdpDBArtJobMonitor implements Runnable { } } + Thread.sleep(100 * statsUpdateSecs); + // Reload the managers + sessionManager = sessionPlan.getPlanSessionStatusManagerProxy(); + statsManager = sessionPlan.getPlanSessionStatisticsManagerProxy(); + if (sessionManager == null || statsManager == null) { + log.error("Session Manager or stats Manager null! " + sessionManager + ", " + statsManager); + } } + updateProgressStatistics(); statusManager.getStatistics(status.getId()) .setAdpEngineStatistics(statsManager.getStatistics()); - if (sessionManager != null && sessionManager.hasError() == false) { + if (sessionManager != null && !sessionManager.hasError()) { + log.info("Session finished, closing! ID: " + sessionPlan.getSessionID().getLongId() + + " , QueryID: " + queryID.getQueryID()); statusManager.setFinished(status.getId()); } else { + log.info("Session error! ID: " + sessionPlan.getSessionID().getLongId() + + " , QueryID: " + queryID.getQueryID()); statusManager.setError(status.getId(), sessionManager.getErrorList().get(0)); } + log.debug("Session closing! ID: " + sessionPlan.getSessionID().getLongId() + + " , QueryID: " + queryID.getQueryID()); sessionPlan.close(); + } catch (Exception e) { statusManager.setError(status.getId(), e); - log.error("Cannot monitor job!", e); + log.error("Cannot monitor job, sessionID: " + sessionPlan.getSessionID().getLongId()); + log.error("Cannot monitor job, queryID: " + status.getQueryID().getQueryID(), e); } finally { - log.debug("Terminate listeners ( " + listeners.size() + ")..."); synchronized (listeners) { for (AdpDBQueryListener l : listeners) { l.terminated(queryID, status); @@ -133,7 +141,7 @@ public class AdpDBArtJobMonitor implements Runnable { statsOldOP = operatorsCompleted; return true; } - } catch (UnmarshalException _) { + } catch (UnmarshalException e) { log.error("Cannot decode information ..."); } return false; diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/executor/remote/AdpDBExecutorRemote.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/executor/remote/AdpDBExecutorRemote.java index b0bd57fb4264281080ee153ed7026665be9b27da..513297fe2548ce18514c903b4c4d3a2e69f346cd 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/executor/remote/AdpDBExecutorRemote.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/executor/remote/AdpDBExecutorRemote.java @@ -149,7 +149,6 @@ public class AdpDBExecutorRemote implements AdpDBExecutor { AdpDBArtJobMonitor monitor = new AdpDBArtJobMonitor(sessionPlan, status, statusManager, execPlan.getQueryID()); monitors.put(execPlan.getQueryID(), monitor); - executor.submit(monitor); statusArray.add(status); return status; diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/executor/remote/operator/process/ExecuteSelect.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/executor/remote/operator/process/ExecuteSelect.java index 8a7247080c36d36e43874624275cec8659ac5dcc..546e607500ce13868e91ea74ddb9b606d9ac635c 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/executor/remote/operator/process/ExecuteSelect.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/executor/remote/operator/process/ExecuteSelect.java @@ -57,7 +57,7 @@ public class ExecuteSelect extends AbstractMiMo { log.debug("Skip saving tables (" + dbOp.getQuery().getOutputTable().getTable().getName() + ") ... "); } - log.info("Currently executing: \n " + state.toString()); + log.debug("Currently executing: \n " + state.toString()); exit(0, state.getExitMessage()); } } diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/handler/IterationsHandler.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/handler/IterationsHandler.java index c668fd4274513c369758611d0364d60000453594..4e968ac2a1bec0b8ef19675f69ee15a0daed707e 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/handler/IterationsHandler.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/handler/IterationsHandler.java @@ -10,9 +10,10 @@ import madgik.exareme.master.engine.iterations.scheduler.IterationsScheduler; import madgik.exareme.master.engine.iterations.state.IterationsStateManager; import madgik.exareme.master.engine.iterations.state.IterationsStateManagerImpl; import madgik.exareme.master.engine.iterations.state.IterativeAlgorithmState; -import madgik.exareme.master.queryProcessor.composer.AlgorithmProperties; -import madgik.exareme.master.queryProcessor.composer.Composer; -import madgik.exareme.master.queryProcessor.composer.Exceptions.ComposerException; +import madgik.exareme.master.gateway.async.handler.HBP.Exceptions.BadUserInputException; +import madgik.exareme.master.queryProcessor.HBP.AlgorithmProperties; +import madgik.exareme.master.queryProcessor.HBP.Composer; +import madgik.exareme.master.queryProcessor.HBP.Exceptions.ComposerException; import madgik.exareme.worker.art.container.ContainerProxy; import org.apache.log4j.Logger; @@ -65,7 +66,7 @@ public class IterationsHandler { */ public IterativeAlgorithmState handleNewIterativeAlgorithmRequest( AdpDBManager adpDBManager, String algorithmKey, - AlgorithmProperties algorithmProperties, ContainerProxy[] usedContainerProxies) { + AlgorithmProperties algorithmProperties, ContainerProxy[] usedContainerProxies) throws BadUserInputException { // Generate the AdpDBClient for this iterative algorithm that will be used to execute all the phases' queries AdpDBClient adpDBClient; diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/handler/IterationsHandlerUtils.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/handler/IterationsHandlerUtils.java index c8721d56c6d41497953e47b0cabfccbd7126ea27..7b56c59d12b25ed345acb900fac72ff50549e8fb 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/handler/IterationsHandlerUtils.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/handler/IterationsHandlerUtils.java @@ -1,7 +1,6 @@ package madgik.exareme.master.engine.iterations.handler; import madgik.exareme.common.consts.HBPConstants; -import madgik.exareme.master.queryProcessor.composer.Algorithms; import org.apache.log4j.Logger; /** diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/handler/NIterativeAlgorithmResultEntity.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/handler/NIterativeAlgorithmResultEntity.java index 75310f95bb95afcf5e13d4985d24023d1d9780fb..04d23ade2c4f6e92244e2dd20b87ab8edac644c2 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/handler/NIterativeAlgorithmResultEntity.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/handler/NIterativeAlgorithmResultEntity.java @@ -3,6 +3,7 @@ package madgik.exareme.master.engine.iterations.handler; import madgik.exareme.master.client.AdpDBClientQueryStatus; import madgik.exareme.master.connector.DataSerialization; import madgik.exareme.master.engine.iterations.state.IterativeAlgorithmState; +import madgik.exareme.master.gateway.async.handler.HBP.HBPQueryHelper; import org.apache.http.entity.BasicHttpEntity; import org.apache.http.nio.ContentEncoder; import org.apache.http.nio.IOControl; @@ -46,11 +47,6 @@ public class NIterativeAlgorithmResultEntity extends BasicHttpEntity this.dataSerialization = dataSerialization; } - private final static String user_error = new String("text/plain+user_error"); - private final static String error = new String("text/plain+error"); - private final static String warning = new String("text/plain+warning"); - - /** * @param encoder is used to save the output * @param ioctrl will be used from the iterativeAlgorithmState, when the algorithm is complete, @@ -86,9 +82,11 @@ public class NIterativeAlgorithmResultEntity extends BasicHttpEntity if (!finalizeQueryStatus.hasError() && finalizeQueryStatus.hasFinished()) { if (channel == null) { + String result = iterativeAlgorithmState.getAdpDBClientQueryStatus().getResult(dataSerialization); + log.info("Iterative algorithm with key " + iterativeAlgorithmState.getAlgorithmKey() + + " terminated. Result: \n " + result); channel = Channels.newChannel( - iterativeAlgorithmState.getAdpDBClientQueryStatus() - .getResult(dataSerialization)); + new ByteArrayInputStream(result.getBytes(StandardCharsets.UTF_8))); } // Reading from the channel to the buffer, flip is required by the API channel.read(buffer); @@ -98,11 +96,15 @@ public class NIterativeAlgorithmResultEntity extends BasicHttpEntity this.buffer.compact(); if (i < 1 && !buffering) { encoder.complete(); + closeQuery(); + close(); } } else { encoder.write(ByteBuffer.wrap( finalizeQueryStatus.getError().getBytes())); encoder.complete(); + closeQuery(); + close(); } } else { // Algorithm execution failed, notify the client. @@ -124,34 +126,35 @@ public class NIterativeAlgorithmResultEntity extends BasicHttpEntity String result = iterativeAlgorithmState.getAlgorithmError(); if (result.contains("ExaremeError:")) { String data = result.substring(result.lastIndexOf("ExaremeError:") + "ExaremeError:".length()).replaceAll("\\s", " "); - String type = user_error; - String output = defaultOutputFormat(data,type); + String type = HBPQueryHelper.ErrorResponse.ErrorResponseTypes.user_error; + String output = HBPQueryHelper.ErrorResponse.createErrorResponse(data, type); logErrorMessage(output); channel = Channels.newChannel( new ByteArrayInputStream(output.getBytes(StandardCharsets.UTF_8))); } else if (result.contains("PrivacyError")) { String data = "The Experiment could not run with the input provided because there are insufficient data."; - String type = warning; - String output = defaultOutputFormat(data,type); + String type = HBPQueryHelper.ErrorResponse.ErrorResponseTypes.warning; + String output = HBPQueryHelper.ErrorResponse.createErrorResponse(data, type); logErrorMessage(output); channel = Channels.newChannel( new ByteArrayInputStream(output.getBytes(StandardCharsets.UTF_8))); } else if (result.matches("java.rmi.RemoteException: Containers:.*not responding")) { String data = "One or more containers are not responding. Please inform the system administrator."; - String type = error; - String output = defaultOutputFormat(data,type); + String type = HBPQueryHelper.ErrorResponse.ErrorResponseTypes.error; + String output = HBPQueryHelper.ErrorResponse.createErrorResponse(data, type); logErrorMessage(output); channel = Channels.newChannel( new ByteArrayInputStream(output.getBytes(StandardCharsets.UTF_8))); } else { // Unexpected error + log.info("Exception from madis: " + result); String data = "Something went wrong with the execution of algorithm: [" + iterativeAlgorithmState.getAlgorithmKey() + "]. Please inform your system administrator to consult the logs."; - String type = error; - String output = defaultOutputFormat(data,type); + String type = HBPQueryHelper.ErrorResponse.ErrorResponseTypes.error; + String output = HBPQueryHelper.ErrorResponse.createErrorResponse(data, type); logErrorMessage(output); channel = Channels.newChannel( new ByteArrayInputStream(output.getBytes(StandardCharsets.UTF_8))); @@ -162,6 +165,8 @@ public class NIterativeAlgorithmResultEntity extends BasicHttpEntity encoder.write(buffer); this.buffer.compact(); encoder.complete(); + closeQuery(); + close(); } } finally { if (iterativeAlgorithmState != null) @@ -169,26 +174,28 @@ public class NIterativeAlgorithmResultEntity extends BasicHttpEntity } } - @Override - public void close() throws IOException { + public void closeQuery() throws IOException { if (finalizeQueryStatus != null) { // Case in which algorithm execution failed finalizeQueryStatus.close(); finalizeQueryStatus = null; } + if (iterativeAlgorithmState != null) + iterativeAlgorithmState.releaseLock(); iterativeAlgorithmState = null; } @Override - public boolean isRepeatable() { - return false; + public void close() { + } - private String defaultOutputFormat(String data, String type){ - return "{\"result\" : [{\"data\":"+"\""+data+"\",\"type\":"+"\""+type+"\"}]}"; + @Override + public boolean isRepeatable() { + return false; } - private void logErrorMessage(String error){ + private void logErrorMessage(String error) { log.info("Algorithm exited with error and returned:\n " + error); } } diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/scheduler/events/IterationsEventHandler.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/scheduler/events/IterationsEventHandler.java index 7c28a362bb47238ef2fc43b609607d34b8eabe24..0d9c009f0cb7df8cd33cfd075c05cc8329c208d6 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/scheduler/events/IterationsEventHandler.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/scheduler/events/IterationsEventHandler.java @@ -3,7 +3,6 @@ package madgik.exareme.master.engine.iterations.scheduler.events; import madgik.exareme.common.app.engine.AdpDBQueryID; import madgik.exareme.master.client.AdpDBClientQueryStatus; import madgik.exareme.master.engine.iterations.scheduler.IterationsDispatcher; -import madgik.exareme.master.engine.iterations.scheduler.events.phaseCompletion.PhaseCompletionEventHandler; import madgik.exareme.master.engine.iterations.state.IterationsStateManager; import madgik.exareme.master.engine.iterations.state.IterativeAlgorithmState; import madgik.exareme.utils.eventProcessor.EventHandler; @@ -56,8 +55,9 @@ public abstract class IterationsEventHandler<T extends IterationsEvent> ias.getAlgorithmKey(), dflScript); - log.info("New Iterative phase: " + currentPhase); - log.info("Executing Iterative DFL Script: \n" + dflScript); + log.info("New Iterative phase: " + currentPhase + " for algorithm: " + ias.getAlgorithmKey() + + " with queryID: " + queryStatus.getQueryID().getQueryID()); + log.debug("Executing Iterative DFL Script: \n" + dflScript); ias.setCurrentExecutionPhase(currentPhase); diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/scheduler/events/phaseCompletion/PhaseCompletionEventHandler.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/scheduler/events/phaseCompletion/PhaseCompletionEventHandler.java index 785bb9bb934573939a08f3810bc52385b342d8d3..7127c098b9691e33d68002ed1057d54bd3bf296c 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/scheduler/events/phaseCompletion/PhaseCompletionEventHandler.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/scheduler/events/phaseCompletion/PhaseCompletionEventHandler.java @@ -109,8 +109,7 @@ public class PhaseCompletionEventHandler extends IterationsEventHandler<PhaseCom String terminationConditionResult; try { - InputStream previousResultStream = ias.getAdpDBClientQueryStatus().getResult(); - terminationConditionResult = IOUtils.toString(previousResultStream, StandardCharsets.UTF_8); + terminationConditionResult = ias.getAdpDBClientQueryStatus().getResult(); } catch (IOException e) { throw new IterationsStateFatalException( "Could not read the termination_condition result table.", ias.getAlgorithmKey()); diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/state/IterativeAlgorithmState.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/state/IterativeAlgorithmState.java index 24f53019f1e4716232ac086d6a081f99d05d062a..0bbd14bb5f68116c7cb0be254b4f191c03c25551 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/state/IterativeAlgorithmState.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/iterations/state/IterativeAlgorithmState.java @@ -5,7 +5,7 @@ import madgik.exareme.master.client.AdpDBClientQueryStatus; import madgik.exareme.master.engine.iterations.handler.IterationsConstants; import madgik.exareme.master.engine.iterations.handler.IterationsHandlerDFLUtils; import madgik.exareme.master.engine.iterations.state.exceptions.IterationsStateFatalException; -import madgik.exareme.master.queryProcessor.composer.AlgorithmProperties; +import madgik.exareme.master.queryProcessor.HBP.AlgorithmProperties; import org.apache.commons.lang3.text.StrSubstitutor; import org.apache.http.nio.IOControl; import org.apache.log4j.Logger; diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/scheduler/AdpDBQueryScheduler.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/scheduler/AdpDBQueryScheduler.java index fd785d00cce46445f46952bb4d22b909d8a80aaf..15036ca9ea375740b6521d7a170dd52881519abe 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/scheduler/AdpDBQueryScheduler.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/scheduler/AdpDBQueryScheduler.java @@ -29,6 +29,7 @@ import madgik.exareme.master.registry.Registry; import madgik.exareme.utils.eventProcessor.EventProcessor; import java.rmi.RemoteException; +import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; /** diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/statusMgr/AdpDBJobSession.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/statusMgr/AdpDBJobSession.java index c5de13bf1dc4d807605b4283f36e56d15383b804..9d8d3a67d25315f305c3c12bd662f555659e9f04 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/statusMgr/AdpDBJobSession.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/engine/statusMgr/AdpDBJobSession.java @@ -8,7 +8,6 @@ import madgik.exareme.common.app.engine.AdpDBQueryListener; import madgik.exareme.common.app.engine.AdpDBStatistics; import madgik.exareme.master.engine.AdpDBManagerLocator; import madgik.exareme.worker.art.executionEngine.session.ExecutionEngineSessionPlan; -import org.apache.log4j.Logger; import java.rmi.RemoteException; import java.util.Map; @@ -17,7 +16,6 @@ import java.util.Map; * @author herald */ public class AdpDBJobSession { - private static final Logger log = Logger.getLogger(AdpDBJobSession.class); private boolean finished = false; private boolean error = false; private Exception exception = null; diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/HttpAsyncExaremeGateway.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/HttpAsyncExaremeGateway.java index ed299839ac104ea9f718b64dea3a7efbb3d45d1a..abff0fa84e1b86d7dca9fe356bc30e806cc099e4 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/HttpAsyncExaremeGateway.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/HttpAsyncExaremeGateway.java @@ -4,6 +4,7 @@ import madgik.exareme.master.engine.AdpDBManager; import madgik.exareme.master.gateway.ExaremeGateway; import madgik.exareme.master.gateway.ExaremeGatewayUtils; import madgik.exareme.master.gateway.async.handler.*; +import madgik.exareme.master.gateway.async.handler.HBP.HBPQueryHandler; import madgik.exareme.master.gateway.control.handler.HttpAsyncCheckWorker; import madgik.exareme.master.gateway.control.handler.HttpAsyncRemoveWorkerHandler; import org.apache.http.config.ConnectionConfig; @@ -61,8 +62,8 @@ public class HttpAsyncExaremeGateway implements ExaremeGateway { registry.register(ExaremeGatewayUtils.GW_API_QUERY, new HttpAsyncQueryHandler()); registry.register(ExaremeGatewayUtils.GW_API_TABLE, new HttpAsyncTableHandler()); registry.register(ExaremeGatewayUtils.GW_API_MINING_ALGORITHMS, new HttpAsyncMiningAlgorithmsHandler()); - registry.register(ExaremeGatewayUtils.GW_API_MINING_QUERY, new HttpAsyncMiningQueryHandler()); - registry.register("/v1/mining/*", new HttpAsyncMiningQueryHandler()); + registry.register(ExaremeGatewayUtils.GW_API_MINING_QUERY, new HBPQueryHandler()); + registry.register("/v1/mining/*", new HBPQueryHandler()); final HttpAsyncService handler = new HttpAsyncService(httpproc, null, null, registry, null, null); diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/Exceptions/DatasetsException.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/Exceptions/DatasetsException.java deleted file mode 100644 index 4fd3a815b74506f0e7fb17ce1f2712ae7e54cbbb..0000000000000000000000000000000000000000 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/Exceptions/DatasetsException.java +++ /dev/null @@ -1,7 +0,0 @@ -package madgik.exareme.master.gateway.async.handler.Exceptions; - -public class DatasetsException extends Exception{ - public DatasetsException(String message) { - super(message); - } -} \ No newline at end of file diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/Exceptions/PathologyException.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/Exceptions/PathologyException.java deleted file mode 100644 index 1d92e083884c2f3cda959169c379ed1ba6d0ad70..0000000000000000000000000000000000000000 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/Exceptions/PathologyException.java +++ /dev/null @@ -1,7 +0,0 @@ -package madgik.exareme.master.gateway.async.handler.Exceptions; - -public class PathologyException extends Exception{ - public PathologyException(String message) { - super(message); - } -} diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HBP/ConsulNodesPathologiesAndDatasetsInfo.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HBP/ConsulNodesPathologiesAndDatasetsInfo.java new file mode 100644 index 0000000000000000000000000000000000000000..62ad422c35e8a016eeb42527ea14f535bc71415a --- /dev/null +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HBP/ConsulNodesPathologiesAndDatasetsInfo.java @@ -0,0 +1,239 @@ +package madgik.exareme.master.gateway.async.handler.HBP; + +import com.google.gson.Gson; +import com.google.gson.JsonSyntaxException; +import madgik.exareme.master.gateway.async.handler.HBP.Exceptions.ConsulException; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.util.EntityUtils; +import org.apache.log4j.Logger; + +import java.io.IOException; +import java.util.*; + +/** + * A class that contains the information of every node. + * A map between the node IP and it's data (pathologies/datasets). + */ +public class ConsulNodesPathologiesAndDatasetsInfo { + + private static final Logger log = Logger.getLogger(ConsulNodesPathologiesAndDatasetsInfo.class); + + private final HashMap<String, NodeData> nodesData; + private final HashMap<String, String> nodeIPsToNames; + private String masterNodeIP; + + private static final CloseableHttpClient httpClient = HttpClients.createDefault(); + + /** + * Fetches the node information from CONSUL. + * + * @throws ConsulException when a communication problem with consul occurs + */ + ConsulNodesPathologiesAndDatasetsInfo() throws ConsulException { + nodesData = new HashMap<>(); + nodeIPsToNames = new HashMap<>(); + Gson gson = new Gson(); + String activeWorkersPathsJson = searchConsulKeys(getConsulActiveWorkersPath()); + String[] activeWorkersPaths = gson.fromJson(activeWorkersPathsJson, String[].class); + + for (String workerPath : activeWorkersPaths) { + String nodeName = workerPath.substring(workerPath.lastIndexOf("/") + 1); + String nodeIP = getConsulData(workerPath); + nodesData.put(nodeIP, new NodeData(nodeName)); + nodeIPsToNames.put(nodeIP, nodeName); + } + + log.debug("CONSUL DATA"); + log.debug("Node IPs to Names"); + for (Map.Entry<String, String> node : nodeIPsToNames.entrySet()) { + log.debug("NodeIp: " + node.getKey() + " , NodeName: " + node.getValue()); + } + log.debug("Node Data"); + for (Map.Entry<String, NodeData> nodeData : nodesData.entrySet()) { + log.debug("NodeIp: " + nodeData.getKey() + " , NODEDATA ---> "); + for (Map.Entry<String, ArrayList<String>> pathology : nodeData.getValue().pathologyDatasets.entrySet()) { + log.debug("Pathology: " + pathology.getKey() + ", Datasets: " + String.join(",", pathology.getValue())); + } + log.debug("<----- NODE DATA"); + } + } + + public ArrayList<String> getDatasetsOfPathology(String pathology) { + ArrayList<String> datasets = new ArrayList<>(); + for (NodeData nodeData : nodesData.values()) { + if (nodeData.pathologyDatasets.containsKey(pathology)) { + datasets.addAll(nodeData.pathologyDatasets.get(pathology)); + } + } + return datasets; + } + + /** + * Fetches the node IP's and datasets that include any of the datasets provided. + * + * @param pathology is used to limit the search + * @param datasets to search for in nodes + * @return the node IP's and datasets + */ + public HashMap<String, ArrayList<String>> getNodeDatasets(String pathology, ArrayList<String> datasets) { + HashMap<String, ArrayList<String>> nodeIPToDatasets = new HashMap<>(); + + for (Map.Entry<String, NodeData> nodeData : nodesData.entrySet()) { + + // Get the datasets for the specified pathology only + ArrayList<String> nodeDatasets = nodeData.getValue().pathologyDatasets.get(pathology); + + // Skip nodes without datasets on that pathology + if (nodeDatasets == null) continue; + + // If the nodeDatasets contains any of the required datasets + if (!Collections.disjoint(nodeDatasets, datasets)) { + nodeIPToDatasets.put(nodeData.getKey(), nodeDatasets); + } + } + return nodeIPToDatasets; + } + + public String getNodeName(String nodeIP) { + return nodeIPsToNames.get(nodeIP); + } + + /** + * Fetches all the available pathologies in the nodes + * + * @return the pathologies + */ + public ArrayList<String> getAllAvailablePathologies() { + ArrayList<String> nodesPathologies = new ArrayList<>(); + for (NodeData nodeData : nodesData.values()) { + nodesPathologies.addAll(nodeData.pathologyDatasets.keySet()); + } + return nodesPathologies; + } + + /** + * Fetches the master node's IP only if not already fetched + * + * @return master node's IP + */ + public String getMasterNodeIP() throws ConsulException { + if (masterNodeIP != null) { + return masterNodeIP; + } + + Gson gson = new Gson(); + String masterPathJson = searchConsulKeys(getConsulMasterPath()); + String masterPath = gson.fromJson(masterPathJson, String[].class)[0]; + masterNodeIP = getConsulData(masterPath); + return masterNodeIP; + } + + /** + * A class that contains the information of every node. + * A map between the pathologies of the node and the + * datasets in each pathology. + */ + public static class NodeData { + public HashMap<String, ArrayList<String>> pathologyDatasets; + + /** + * Fetches the node information from CONSUL. + * + * @param nodeName the name of the node in CONSUL + * @throws ConsulException when a communication problem with consul occurs + */ + NodeData(String nodeName) throws ConsulException { + pathologyDatasets = new HashMap<>(); + + // Get the available pathologies of the node from CONSUL. + Gson gson = new Gson(); + try { + String nodePathologiesPathsJson = searchConsulKeys(getConsulDataPath() + "/" + nodeName); + if (nodePathologiesPathsJson == null) { + return; + } + String[] nodePathologiesPaths = gson.fromJson(nodePathologiesPathsJson, String[].class); + + // Get the available datasets for each pathology and add it to the hash. + for (String nodePathologyPath : nodePathologiesPaths) { + String pathology = nodePathologyPath.substring(nodePathologyPath.lastIndexOf("/") + 1); + String nodePathologyDatasetsJson = getConsulData(nodePathologyPath); + String[] nodePathologyDatasets = gson.fromJson(nodePathologyDatasetsJson, String[].class); + pathologyDatasets.put(pathology, new ArrayList<>(Arrays.asList(nodePathologyDatasets))); + } + } catch (JsonSyntaxException e) { + throw new ConsulException("There was a problem parsing the response from consul: " + e.getMessage()); + } catch (ConsulException e) { + // The node is up but the data are not added yet. + // continue; + } + } + } + + + /*** ----- Helper functions ----- ***/ + private static String searchConsulKeys(String query) throws ConsulException { + return searchConsul(query + "?keys"); + } + + private static String getConsulData(String query) throws ConsulException { + return searchConsul(query + "?raw"); + } + + private static String searchConsul(String query) throws ConsulException { + log.debug("Consul Query: " + query); + + String consulURL = getConsulUrl(); + HttpGet request = new HttpGet(consulURL + "/v1/kv/" + query); + try { + CloseableHttpResponse response = httpClient.execute(request); + if (response.getStatusLine().getStatusCode() != 200) { + log.error("Failed consul query: " + consulURL + "/v1/kv/" + query); + throw new ConsulException( + "There was an error contacting consul. StatusCode: " + response.getStatusLine().getStatusCode()); + } + return EntityUtils.toString(response.getEntity()); + + } catch (IOException e) { + log.error("Failed consul query: " + consulURL + "/v1/kv/" + query); + throw new ConsulException( + "An exception occurred while contacting Consul. Exception: " + e.getMessage()); + } finally { + request.releaseConnection(); + } + } + + private static String getConsulUrl() throws ConsulException { + String consulURL = System.getenv("CONSULURL"); + if (consulURL == null) throw new ConsulException("CONSULURL environment variable is not set."); + + if (!consulURL.startsWith("http://")) { + consulURL = "http://" + consulURL; + } + + return consulURL; + } + + private static String getConsulDataPath() throws ConsulException { + String dataPath = System.getenv("CONSUL_DATA_PATH"); + if (dataPath == null) throw new ConsulException("CONSUL_DATA_PATH environment variable is not set."); + return dataPath; + } + + private static String getConsulActiveWorkersPath() throws ConsulException { + String activeWorkersPath = System.getenv("CONSUL_ACTIVE_WORKERS_PATH"); + if (activeWorkersPath == null) throw new ConsulException("CONSUL_DATA_PATH environment variable is not set."); + return activeWorkersPath; + } + + private static String getConsulMasterPath() throws ConsulException { + String masterPath = System.getenv("CONSUL_MASTER_PATH"); + if (masterPath == null) throw new ConsulException("CONSUL_MASTER_PATH environment variable is not set."); + return masterPath; + } + + +} diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HBP/Exceptions/BadUserInputException.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HBP/Exceptions/BadUserInputException.java new file mode 100644 index 0000000000000000000000000000000000000000..8f3b0b7cf9477480ac73b7d80d5e741cef4b6716 --- /dev/null +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HBP/Exceptions/BadUserInputException.java @@ -0,0 +1,7 @@ +package madgik.exareme.master.gateway.async.handler.HBP.Exceptions; + +public class BadUserInputException extends Exception{ + public BadUserInputException(String message) { + super(message); + } +} \ No newline at end of file diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HBP/Exceptions/ConsulException.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HBP/Exceptions/ConsulException.java new file mode 100644 index 0000000000000000000000000000000000000000..b358a143471a3e95f000c051fefc392a8018bf6a --- /dev/null +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HBP/Exceptions/ConsulException.java @@ -0,0 +1,7 @@ +package madgik.exareme.master.gateway.async.handler.HBP.Exceptions; + +public class ConsulException extends Exception{ + public ConsulException(String message) { + super(message); + } +} \ No newline at end of file diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HBP/Exceptions/RequestException.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HBP/Exceptions/RequestException.java new file mode 100644 index 0000000000000000000000000000000000000000..cecd101d1f412c86afb55b5f03365f7712a8db0d --- /dev/null +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HBP/Exceptions/RequestException.java @@ -0,0 +1,7 @@ +package madgik.exareme.master.gateway.async.handler.HBP.Exceptions; + +public class RequestException extends Exception { + public RequestException(String algorithmName, String message) { + super(message + " Algorithm: " + algorithmName); + } +} \ No newline at end of file diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HBP/HBPQueryConstants.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HBP/HBPQueryConstants.java new file mode 100644 index 0000000000000000000000000000000000000000..9e9516b03b8df66967144b737b832b83634fc5cf --- /dev/null +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HBP/HBPQueryConstants.java @@ -0,0 +1,15 @@ +package madgik.exareme.master.gateway.async.handler.HBP; + +public class HBPQueryConstants { + public static String pathologyXNotAvailable = "Pathology %s is not available."; + public static String pathologyNotProvided = "Please provide a pathology."; + public static String datasetXDoesNotExistInPathologyY = "Dataset(s) %s does not exist in pathology %s."; + + public static String datasetsXYZAreInactive = + "The following datasets %s are currently unavailable. Please try again later."; + + public static String nodesUnavailable = "Some nodes are unavailable. Please try again later."; + + public static String serverErrorOccurred = + "Something went wrong. Please consult the system administrator or try again later."; +} diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HBP/HBPQueryHandler.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HBP/HBPQueryHandler.java new file mode 100644 index 0000000000000000000000000000000000000000..8778a13689f036ae423f216d55a0475dc1f550ba --- /dev/null +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HBP/HBPQueryHandler.java @@ -0,0 +1,239 @@ +package madgik.exareme.master.gateway.async.handler.HBP; + +import madgik.exareme.common.consts.HBPConstants; +import madgik.exareme.master.client.AdpDBClient; +import madgik.exareme.master.client.AdpDBClientFactory; +import madgik.exareme.master.client.AdpDBClientProperties; +import madgik.exareme.master.client.AdpDBClientQueryStatus; +import madgik.exareme.master.connector.DataSerialization; +import madgik.exareme.master.engine.AdpDBManager; +import madgik.exareme.master.engine.AdpDBManagerLocator; +import madgik.exareme.master.engine.iterations.exceptions.IterationsFatalException; +import madgik.exareme.master.engine.iterations.handler.IterationsHandler; +import madgik.exareme.master.engine.iterations.handler.NIterativeAlgorithmResultEntity; +import madgik.exareme.master.engine.iterations.state.IterativeAlgorithmState; +import madgik.exareme.master.gateway.ExaremeGatewayUtils; +import madgik.exareme.master.gateway.async.handler.HBP.Exceptions.BadUserInputException; +import madgik.exareme.master.gateway.async.handler.HBP.Exceptions.RequestException; +import madgik.exareme.master.gateway.async.handler.entity.NQueryResultEntity; +import madgik.exareme.master.queryProcessor.HBP.AlgorithmProperties; +import madgik.exareme.master.queryProcessor.HBP.Algorithms; +import madgik.exareme.master.queryProcessor.HBP.Composer; +import madgik.exareme.worker.art.container.ContainerProxy; +import org.apache.http.*; +import org.apache.http.entity.BasicHttpEntity; +import org.apache.http.entity.ContentType; +import org.apache.http.nio.protocol.*; +import org.apache.http.protocol.HttpContext; +import org.apache.log4j.Logger; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; + +import static madgik.exareme.master.gateway.GatewayConstants.COOKIE_ALGORITHM_EXECUTION_ID; +import static madgik.exareme.master.gateway.async.handler.HBP.HBPQueryConstants.serverErrorOccurred; + +public class HBPQueryHandler implements HttpAsyncRequestHandler<HttpRequest> { + + private static final Logger log = Logger.getLogger(HBPQueryHandler.class); + private static final String SET_COOKIE_HEADER_NAME = "Set-Cookie"; + private static final AdpDBManager manager = AdpDBManagerLocator.getDBManager(); + private static final IterationsHandler iterationsHandler = IterationsHandler.getInstance(); + + public HBPQueryHandler() { + } + + @Override + public HttpAsyncRequestConsumer<HttpRequest> processRequest(HttpRequest request, + HttpContext context) { + + return new BasicAsyncRequestConsumer(); + } + + @Override + public void handle(HttpRequest request, HttpAsyncExchange httpExchange, HttpContext context) + throws HttpException, IOException { + + HttpResponse response = httpExchange.getResponse(); + response.setHeader("Content-Type", String.valueOf(ContentType.APPLICATION_JSON)); + + // When under testing the Set-Cookie header has been used with the "algorithm execution id" + // parameter for differentiating between concurrent executions of algorithms. + if (request.containsHeader(SET_COOKIE_HEADER_NAME)) { + HeaderIterator it = request.headerIterator(SET_COOKIE_HEADER_NAME); + + // Parse "algorithm execution id" cookie + StringBuilder echoCookieContent = new StringBuilder(); + while (it.hasNext()) { + echoCookieContent.append(it.next()); + } + + String cookieContentStr = echoCookieContent.toString(); + if (!cookieContentStr.isEmpty() && + cookieContentStr.contains(COOKIE_ALGORITHM_EXECUTION_ID)) { + + String algorithmExecIdStr = + cookieContentStr.substring(cookieContentStr.indexOf(" ")).split("=")[1]; + + response.addHeader(SET_COOKIE_HEADER_NAME, + COOKIE_ALGORITHM_EXECUTION_ID + "=" + algorithmExecIdStr); + } + } + try { + handleHBPAlgorithmExecution(request, response); + } catch (Exception e) { + log.error(e.getMessage()); + response.setStatusCode(HttpStatus.SC_BAD_REQUEST); + String errorType = HBPQueryHelper.ErrorResponse.ErrorResponseTypes.user_error; + response.setEntity(createErrorResponseEntity(e.getMessage(), errorType)); + } + httpExchange.submitResponse(new BasicAsyncResponseProducer(response)); + } + + private void handleHBPAlgorithmExecution(HttpRequest request, HttpResponse response) { + + try { + preExecutionChecks(request); + + String algorithmName = getAlgorithmName(request); + AlgorithmProperties algorithmProperties = Algorithms.getInstance().getAlgorithmProperties(algorithmName); + if (algorithmProperties == null) + throw new RequestException(algorithmName, "The algorithm '" + algorithmName + "' does not exist."); + + String algorithmKey = algorithmName + "_" + System.currentTimeMillis(); + + // Logging the algorithm execution parameters + log.info("Executing algorithm: " + algorithmName + " with key: " + algorithmKey); + HashMap<String, String> algorithmParameters = HBPQueryHelper.getAlgorithmParameters(request); + log.info("Request for algorithm: " + algorithmName); + if (algorithmParameters != null) { + for (Map.Entry<String, String> parameter : algorithmParameters.entrySet()) + log.info("Parameter: " + parameter.getKey() + ", with value: " + parameter.getValue()); + } + + ContainerProxy[] algorithmContainers = HBPQueryHelper.getAlgorithmNodes(algorithmParameters); + + AdpDBClientQueryStatus queryStatus; + + algorithmProperties.mergeWithAlgorithmParameters(algorithmParameters); + + DataSerialization ds = DataSerialization.summary; + + // Bypass direct composer call in case of iterative algorithm. + if (algorithmProperties.getType().equals(AlgorithmProperties.AlgorithmType.iterative) || + algorithmProperties.getType().equals(AlgorithmProperties.AlgorithmType.python_iterative)) { + + final IterativeAlgorithmState iterativeAlgorithmState = + iterationsHandler.handleNewIterativeAlgorithmRequest( + manager, algorithmKey, algorithmProperties, algorithmContainers); + + log.info("Iterative algorithm " + algorithmKey + " execution started."); + + BasicHttpEntity entity = new NIterativeAlgorithmResultEntity( + iterativeAlgorithmState, ds, ExaremeGatewayUtils.RESPONSE_BUFFER_SIZE); + + response.setStatusCode(HttpStatus.SC_OK); + response.setEntity(entity); + } else { + String dfl = Composer.composeDFLScript(algorithmKey, algorithmProperties, algorithmContainers.length); + try { + Composer.persistDFLScriptToAlgorithmsDemoDirectory( + HBPConstants.DEMO_ALGORITHMS_WORKING_DIRECTORY + "/" + algorithmKey + + "/" + algorithmKey, + dfl, null); + } catch (IOException e) { + // Ignoring error if failed to persist DFL Scripts - it's not something fatal. + log.error(e); + } + + AdpDBClientProperties clientProperties = + new AdpDBClientProperties( + HBPConstants.DEMO_DB_WORKING_DIRECTORY + algorithmKey, + "", "", false, false, + -1, 10); + clientProperties.setContainerProxies(algorithmContainers); + AdpDBClient dbClient = + AdpDBClientFactory.createDBClient(manager, clientProperties); + queryStatus = dbClient.query(algorithmKey, dfl); + + log.info("Algorithm " + algorithmKey + " with queryID " + + queryStatus.getQueryID().getQueryID() + " execution started."); + log.debug("DFL Script: \n " + dfl); + + BasicHttpEntity entity = new NQueryResultEntity(queryStatus, ds, + ExaremeGatewayUtils.RESPONSE_BUFFER_SIZE); + response.setStatusCode(HttpStatus.SC_OK); + response.setEntity(entity); + } + } catch (BadUserInputException e) { + log.error(e.getMessage()); + String errorType = HBPQueryHelper.ErrorResponse.ErrorResponseTypes.user_error; + response.setStatusCode(HttpStatus.SC_OK); + response.setEntity(createErrorResponseEntity(e.getMessage(), errorType)); + + } catch (RequestException e) { + log.error(e.getMessage()); + String errorType = HBPQueryHelper.ErrorResponse.ErrorResponseTypes.user_error; + response.setStatusCode(HttpStatus.SC_BAD_REQUEST); + response.setEntity(createErrorResponseEntity(e.getMessage(), errorType)); + + } catch (IterationsFatalException e) { + log.error(e); + if (e.getErroneousAlgorithmKey() != null) + iterationsHandler.removeIterativeAlgorithmStateInstanceFromISM( + e.getErroneousAlgorithmKey()); + log.error(e); + String errorType = HBPQueryHelper.ErrorResponse.ErrorResponseTypes.error; + response.setStatusCode(HttpStatus.SC_INTERNAL_SERVER_ERROR); + response.setEntity(createErrorResponseEntity(e.getMessage(), errorType)); + + } catch (Exception e) { + log.error(e.getMessage()); + for (StackTraceElement stack : e.getStackTrace()) { + log.error("Stack: " + stack.toString()); + log.error("Stack class: " + stack.getClassName() + ", name: " + stack.getMethodName() + ", line: " + stack.getLineNumber()); + } + log.error(e.getStackTrace()); + String errorType = HBPQueryHelper.ErrorResponse.ErrorResponseTypes.error; + response.setStatusCode(HttpStatus.SC_INTERNAL_SERVER_ERROR); + response.setEntity(createErrorResponseEntity(serverErrorOccurred, errorType)); + } + } + + /// ----- Helper functions ----- /// + + /** + * Checks if the request is a POST request + * + * @param request the request of the algorithm + * @throws UnsupportedHttpVersionException if != POST + */ + private void preExecutionChecks(HttpRequest request) throws UnsupportedHttpVersionException { + log.debug("Validate method ..."); + RequestLine requestLine = request.getRequestLine(); + String method = requestLine.getMethod().toUpperCase(Locale.ENGLISH); + + if (!"POST".equals(method)) { + throw new UnsupportedHttpVersionException(method + " not supported."); + } + } + + private String getAlgorithmName(HttpRequest request) { + RequestLine requestLine = request.getRequestLine(); + String uri = requestLine.getUri(); + return uri.substring(uri.lastIndexOf('/') + 1); + } + + private BasicHttpEntity createErrorResponseEntity(String data, String type) { + BasicHttpEntity entity = new BasicHttpEntity(); + String result = HBPQueryHelper.ErrorResponse.createErrorResponse(data, type); + entity.setContent(new ByteArrayInputStream(result.getBytes())); + return entity; + } +} + + + diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HBP/HBPQueryHelper.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HBP/HBPQueryHelper.java new file mode 100644 index 0000000000000000000000000000000000000000..360157cfb3f11541152bc12b8ea0cabe0fbf88b5 --- /dev/null +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HBP/HBPQueryHelper.java @@ -0,0 +1,302 @@ +package madgik.exareme.master.gateway.async.handler.HBP; + +import com.google.gson.Gson; +import madgik.exareme.master.gateway.async.handler.HBP.Exceptions.ConsulException; +import madgik.exareme.master.gateway.async.handler.HBP.Exceptions.BadUserInputException; +import madgik.exareme.worker.art.container.ContainerProxy; +import madgik.exareme.worker.art.registry.ArtRegistryLocator; +import org.apache.http.HttpEntity; +import org.apache.http.HttpEntityEnclosingRequest; +import org.apache.http.HttpRequest; +import org.apache.http.util.EntityUtils; +import org.apache.log4j.Logger; + +import java.io.IOException; +import java.net.InetAddress; +import java.rmi.RemoteException; +import java.util.*; + +import static madgik.exareme.master.gateway.async.handler.HBP.HBPQueryConstants.*; + +public class HBPQueryHelper { + + private static final Logger log = Logger.getLogger(HBPQueryHelper.class); + + public static class ErrorResponse { + public static class ErrorResponseTypes { + // Error types could be error, user_error, warning regarding the error occurred along the process + public static final String error = "text/plain+error"; + public static final String user_error = "text/plain+user_error"; + public static final String warning = "text/plain+warning"; + } + + public static String createErrorResponse(String data, String type) { + return "{\"result\" : [{\"data\":" + "\"" + data + "\",\"type\":" + "\"" + type + "\"}]}"; + } + } + + public static HashMap<String, String> getAlgorithmParameters(HttpRequest request) throws IOException { + log.debug("Parsing content ..."); + HashMap<String, String> inputContent = new HashMap<>(); + List<Map> parameters = new ArrayList(); + String content; + + if (request instanceof HttpEntityEnclosingRequest) { + log.debug("Streaming ..."); + HttpEntity entity = ((HttpEntityEnclosingRequest) request).getEntity(); + content = EntityUtils.toString(entity); + if (content != null && !content.isEmpty()) { + parameters = new Gson().fromJson(content, List.class); + } + } + if (!parameters.isEmpty()) { + log.debug("All of the parameters: " + parameters); + for (Map k : parameters) { + String name = (String) k.get("name"); + String value = (String) k.get("value"); + if (name == null || name.isEmpty() || value == null || value.isEmpty()) continue; + + log.debug("Parameter in the json: "); + log.debug(name + " = " + value); + + value = value.replaceAll("[^A-Za-z0-9,._~*+><=&|(){}:\\-\\\"\\[\\]]", ""); // ><=&| we no more need those for filtering + value = value.replaceAll("\\s+", ""); + + log.debug("Parameter after format: "); + log.debug(name + " = " + value); + + inputContent.put(name, value); + } + return inputContent; + } + return null; + } + + /** + * This function finds the proper containers on which the algorithm should run. + * It depends on the type of algorithm and on the combination of pathology/datasets: + * 1) HEALTH_CHECK, LIST_DATASETS algorithms don't have pathology or datasets. + * 2) LIST_VARIABLES has only a pathology. + * 3) The "normal" algorithms with Pathology and Datasets. + * <p> + * Validation also happens on the pathology/datasets combination. + * <p> + * The information on nodes are taken from CONSUL if a pathology or dataset is provided. + * <p> + * The nodes then are checked if they are active or not. + * + * @param algorithmParameters are used to get the dataset/pathology + * @return the containers on which the algorithm should run + * @throws ConsulException if consul is unreachable + * @throws BadUserInputException if dataset's node is inactive or doesn't exist in the pathology + * or if the pathology is not available or not provided + * @throws RemoteException if the Exareme Registry is unreachable + */ + public static ContainerProxy[] getAlgorithmNodes(HashMap<String, String> algorithmParameters) + throws ConsulException, BadUserInputException, RemoteException { + ConsulNodesPathologiesAndDatasetsInfo consulNodesPathologiesAndDatasetsInfo = + new ConsulNodesPathologiesAndDatasetsInfo(); + + if (algorithmParameters == null) { // HEALTH_CHECK and LIST_DATASETS algorithms. + // Get containers and log them + ContainerProxy[] containers = getAllActiveExaremeContainers(); + logContainerNodes(containers, consulNodesPathologiesAndDatasetsInfo); + return containers; + } + + String pathology = algorithmParameters.get("pathology"); + + // Get datasets in ArrayList + String datasetsInString = algorithmParameters.get("dataset"); + ArrayList<String> datasets = null; + if (datasetsInString != null) { + datasets = new ArrayList<>(Arrays.asList(datasetsInString.split(","))); + } + log.info("Algorithm pathology: " + pathology + " and datasets: " + datasets + "."); + + validatePathologyAndDatasets( + pathology, + datasets, + consulNodesPathologiesAndDatasetsInfo + ); + + if (datasets != null) { // ALL actual algorithms are in this case + // Get containers and log them + ContainerProxy[] containers = getAlgorithmNodes(pathology, datasets, consulNodesPathologiesAndDatasetsInfo); + logContainerNodes(containers, consulNodesPathologiesAndDatasetsInfo); + return containers; + + } else if (pathology != null) { + // LIST_VARIABLES algorithm. Will only run on master Node. + // Fetching master nodes container. + ArrayList<String> algorithmNodes = new ArrayList<>(); + algorithmNodes.add(consulNodesPathologiesAndDatasetsInfo.getMasterNodeIP()); + + // Get containers and log them + ContainerProxy[] containers = getContainersFromExaremeRegistry(algorithmNodes); + logContainerNodes(containers, consulNodesPathologiesAndDatasetsInfo); + return containers; + + } else { + // If an algorithm parameter exists, a pathology should be provided. + throw new BadUserInputException(pathologyNotProvided); + } + } + + /** + * This function finds the proper containers on which the algorithm should run, + * only for the case of "normal" algorithms with pathology and datasets. + * + * @param pathology of the algorithm + * @param datasets of the algorithm + * @param consulNodesPathologiesAndDatasetsInfo are the consul information needed + * @return the containers to run the algorithm + * @throws BadUserInputException if dataset's node is inactive or doesn't exist in the pathology + * @throws RemoteException if the Exareme Registry is unreachable + */ + private static ContainerProxy[] getAlgorithmNodes( + String pathology, + ArrayList<String> datasets, + ConsulNodesPathologiesAndDatasetsInfo consulNodesPathologiesAndDatasetsInfo + ) throws RemoteException, BadUserInputException { + + HashMap<String, ArrayList<String>> algorithmNodeIPsAndDatasets = consulNodesPathologiesAndDatasetsInfo.getNodeDatasets(pathology, datasets); + ArrayList<String> algorithmNodes = new ArrayList<>(algorithmNodeIPsAndDatasets.keySet()); + ArrayList<String> inactiveNodes = getInactiveNodes(algorithmNodes); + + if (inactiveNodes.isEmpty()) { + return getContainersFromExaremeRegistry(algorithmNodes); + } + + log.info("Inactive Nodes: " + String.join(", ", inactiveNodes)); + + // Find the datasets that the user wanted but are inactive. + ArrayList<String> inactiveDatasets = new ArrayList<>(); + for (String inactiveNode : inactiveNodes) { + for (String inactiveDataset : algorithmNodeIPsAndDatasets.get(inactiveNode)) { + if (datasets.contains(inactiveDataset)) { + inactiveDatasets.add(inactiveDataset); + } + } + } + throw new BadUserInputException( + String.format( + datasetsXYZAreInactive, + String.join(", ", inactiveDatasets) + ) + ); + } + + private static void validatePathologyAndDatasets( + String pathology, + ArrayList<String> datasets, + ConsulNodesPathologiesAndDatasetsInfo nodesInformation + ) throws BadUserInputException { + + if (pathology != null) { + log.debug("Available pathologies: " + nodesInformation.getAllAvailablePathologies()); + if (!nodesInformation.getAllAvailablePathologies().contains(pathology)) { + throw new BadUserInputException(String.format(pathologyXNotAvailable, pathology)); + } + + if (datasets != null) { + ArrayList<String> datasetsOfPathology = nodesInformation.getDatasetsOfPathology(pathology); + for (String dataset : datasets) { + if (!datasetsOfPathology.contains(dataset)) { + throw new BadUserInputException(String.format(datasetXDoesNotExistInPathologyY, dataset, pathology)); + } + } + } + } else { + if (datasets != null) { + throw new BadUserInputException(pathologyNotProvided); + } + } + } + + /** + * Get nodes that are inactive from the list provided. + * An inactive node is a node that doesn't exist in the Exareme registry. + * + * @param algorithmNodes to check + * @return nodes that are inactive + */ + private static ArrayList<String> getInactiveNodes(ArrayList<String> algorithmNodes) throws RemoteException { + ArrayList<String> nodes = new ArrayList<>(algorithmNodes); + ContainerProxy[] allActiveExaremeContainers = getAllActiveExaremeContainers(); + for (ContainerProxy container : allActiveExaremeContainers) { + nodes.remove(container.getEntityName().getIP()); + } + return nodes; + } + + /** + * Fetches all the active containers in the exareme RMI registry. + * + * @return all the containers that are still active + * @throws RemoteException if the Exareme Registry is unreachable + */ + private static ContainerProxy[] getAllActiveExaremeContainers() throws RemoteException { + removeInactiveExaremeContainers(); + return ArtRegistryLocator.getArtRegistryProxy().getContainers(); + } + + /** + * Removes any container that is unreachable in the Exareme RMI registry. + * + * @throws RemoteException if the Exareme Registry is unreachable + */ + private static void removeInactiveExaremeContainers() throws RemoteException { + ContainerProxy[] containers = ArtRegistryLocator.getArtRegistryProxy().getContainers(); + for (ContainerProxy container : containers) { + if (nodeUnreachable(container.getEntityName().getIP())) { + log.info("Removing unreachable node with IP: " + container.getEntityName().getIP()); + ArtRegistryLocator.getArtRegistryProxy().removeContainer(container.getEntityName()); + } + } + } + + private static ContainerProxy[] getContainersFromExaremeRegistry(ArrayList<String> nodes) throws RemoteException { + List<ContainerProxy> containers = new LinkedList<>(); + ContainerProxy[] allActiveExaremeContainers = getAllActiveExaremeContainers(); + for (ContainerProxy container : allActiveExaremeContainers) { + if (nodes.contains(container.getEntityName().getIP())) { + containers.add(container); + nodes.remove(container.getEntityName().getIP()); + } + } + + if (nodes.size() > 0) { + log.info("The following nodes are not active: " + String.join(", ", nodes)); + throw new RemoteException(nodesUnavailable); + } + + return containers.toArray(new ContainerProxy[0]); + } + + private static boolean nodeUnreachable(String IP) { + try { + InetAddress checkIP = InetAddress.getByName(IP); + if (checkIP.isReachable(5000)) { + log.debug("Node with IP: " + IP + "is reachable."); + return false; + } else { + log.debug("Node with IP: " + IP + "is unreachable."); + return true; + } + } catch (Exception e) { + log.debug("Node with IP: " + IP + "is not reachable. Exception: " + e.getMessage()); + return true; + } + } + + private static void logContainerNodes(ContainerProxy[] containers, + ConsulNodesPathologiesAndDatasetsInfo consulNodesPathologiesAndDatasetsInfo) { + log.info("Algorithm Nodes: "); + for (ContainerProxy algorithmContainer : containers) { + String nodeIP = algorithmContainer.getEntityName().getIP(); + String nodeName = consulNodesPathologiesAndDatasetsInfo.getNodeName(nodeIP); + log.info(" IP: " + nodeIP + " , NAME: " + nodeName); + } + } +} \ No newline at end of file diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HttpAsyncFileHandler.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HttpAsyncFileHandler.java index 8251415c59a1028bc4a4fef117460e84959cab39..182198318728b75edf45a2829b796df113ffd953 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HttpAsyncFileHandler.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HttpAsyncFileHandler.java @@ -63,7 +63,6 @@ public class HttpAsyncFileHandler implements HttpAsyncRequestHandler<HttpRequest String target = request.getRequestLine().getUri(); final File file = new File(this.docRoot, URLDecoder.decode(target, "UTF-8")); if (!file.exists()) { - response.setStatusCode(HttpStatus.SC_NOT_FOUND); NStringEntity entity = new NStringEntity( "<html><body><h1>File" + @@ -71,7 +70,6 @@ public class HttpAsyncFileHandler implements HttpAsyncRequestHandler<HttpRequest ContentType.create("text/html", "UTF-8")); response.setEntity(entity); System.out.println("File " + file.getPath() + " not found"); - } else if (!file.canRead() || file.isDirectory() || !file.getCanonicalPath().startsWith(this.docRoot.getCanonicalPath())) { response.setStatusCode(HttpStatus.SC_FORBIDDEN); @@ -79,7 +77,7 @@ public class HttpAsyncFileHandler implements HttpAsyncRequestHandler<HttpRequest "<html><body><h1>Access denied</h1></body></html>", ContentType.create("text/html", "UTF-8")); response.setEntity(entity); - System.out.println("Cannot read file " + file.getPath()); + log.debug("Cannot read file " + file.getPath()); } else if (file.getPath().endsWith(".ser")) { HttpCoreContext coreContext = HttpCoreContext.adapt(context); diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HttpAsyncMiningAlgorithmsHandler.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HttpAsyncMiningAlgorithmsHandler.java index 54d9ec52a89d60430ac823a97b6f15b9706a4f7b..492784e0cb2d908661190f5b0ffdbed8f2792bc2 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HttpAsyncMiningAlgorithmsHandler.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HttpAsyncMiningAlgorithmsHandler.java @@ -2,9 +2,9 @@ package madgik.exareme.master.gateway.async.handler; import com.google.gson.Gson; import com.google.gson.GsonBuilder; -import madgik.exareme.master.queryProcessor.composer.AlgorithmProperties; -import madgik.exareme.master.queryProcessor.composer.Algorithms; -import madgik.exareme.master.queryProcessor.composer.Exceptions.AlgorithmException; +import madgik.exareme.master.queryProcessor.HBP.AlgorithmProperties; +import madgik.exareme.master.queryProcessor.HBP.Algorithms; +import madgik.exareme.master.queryProcessor.HBP.Exceptions.AlgorithmException; import org.apache.http.*; import org.apache.http.entity.BasicHttpEntity; import org.apache.http.entity.ContentType; diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HttpAsyncMiningQueryHandler.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HttpAsyncMiningQueryHandler.java deleted file mode 100644 index 0d348eeb9559b8cdedd5449c561192003efad205..0000000000000000000000000000000000000000 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HttpAsyncMiningQueryHandler.java +++ /dev/null @@ -1,328 +0,0 @@ -package madgik.exareme.master.gateway.async.handler; - -import com.google.gson.Gson; -import com.google.gson.JsonSyntaxException; -import madgik.exareme.common.consts.HBPConstants; -import madgik.exareme.master.client.AdpDBClient; -import madgik.exareme.master.client.AdpDBClientFactory; -import madgik.exareme.master.client.AdpDBClientProperties; -import madgik.exareme.master.client.AdpDBClientQueryStatus; -import madgik.exareme.master.connector.DataSerialization; -import madgik.exareme.master.engine.AdpDBManager; -import madgik.exareme.master.engine.AdpDBManagerLocator; -import madgik.exareme.master.engine.iterations.exceptions.IterationsFatalException; -import madgik.exareme.master.engine.iterations.handler.IterationsHandler; -import madgik.exareme.master.engine.iterations.handler.NIterativeAlgorithmResultEntity; -import madgik.exareme.master.engine.iterations.state.IterativeAlgorithmState; -import madgik.exareme.master.gateway.ExaremeGatewayUtils; -import madgik.exareme.master.gateway.async.handler.Exceptions.DatasetsException; -import madgik.exareme.master.gateway.async.handler.Exceptions.PathologyException; -import madgik.exareme.master.gateway.async.handler.entity.NQueryResultEntity; -import madgik.exareme.master.queryProcessor.composer.AlgorithmProperties; -import madgik.exareme.master.queryProcessor.composer.Algorithms; -import madgik.exareme.master.queryProcessor.composer.Composer; -import madgik.exareme.master.queryProcessor.composer.Exceptions.AlgorithmException; -import madgik.exareme.worker.art.container.Container; -import madgik.exareme.worker.art.container.ContainerProxy; -import madgik.exareme.worker.art.registry.ArtRegistryLocator; -import org.apache.http.*; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpDelete; -import org.apache.http.entity.BasicHttpEntity; -import org.apache.http.entity.ContentType; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; -import org.apache.http.nio.protocol.*; -import org.apache.http.protocol.HttpContext; -import org.apache.http.util.EntityUtils; -import org.apache.log4j.Logger; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.net.InetAddress; -import java.rmi.ServerException; -import java.util.*; -import static madgik.exareme.master.gateway.GatewayConstants.COOKIE_ALGORITHM_EXECUTION_ID; - -public class HttpAsyncMiningQueryHandler implements HttpAsyncRequestHandler<HttpRequest> { - - private static final Logger log = Logger.getLogger(HttpAsyncMiningQueryHandler.class); - private static final String SET_COOKIE_HEADER_NAME = "Set-Cookie"; - private static final AdpDBManager manager = AdpDBManagerLocator.getDBManager(); - private static final IterationsHandler iterationsHandler = IterationsHandler.getInstance(); - private static final String error = new String("text/plain+error"); - private static final String user_error = new String("text/plain+user_error"); - - public HttpAsyncMiningQueryHandler() { - } - - @Override - public HttpAsyncRequestConsumer<HttpRequest> processRequest(HttpRequest request, - HttpContext context) throws HttpException, IOException { - - return new BasicAsyncRequestConsumer(); - } - - @Override - public void handle(HttpRequest request, HttpAsyncExchange httpExchange, HttpContext context) - throws HttpException, IOException { - - HttpResponse response = httpExchange.getResponse(); - response.setHeader("Content-Type", String.valueOf(ContentType.APPLICATION_JSON)); - - // When under testing the Set-Cookie header has been used with the "algorithm execution id" - // parameter for differentiating between concurrent executions of algorithms. - if (request.containsHeader(SET_COOKIE_HEADER_NAME)) { - HeaderIterator it = request.headerIterator(SET_COOKIE_HEADER_NAME); - - // Parse "algorithm execution id" cookie - StringBuilder echoCookieContent = new StringBuilder(); - while (it.hasNext()) { - echoCookieContent.append(it.next()); - } - - String cookieContentStr = echoCookieContent.toString(); - if (!cookieContentStr.isEmpty() && - cookieContentStr.contains(COOKIE_ALGORITHM_EXECUTION_ID)) { - - String algorithmExecIdStr = - cookieContentStr.substring( - cookieContentStr.indexOf(" "), - cookieContentStr.length()) - .split("=")[1]; - - response.addHeader( - SET_COOKIE_HEADER_NAME, - COOKIE_ALGORITHM_EXECUTION_ID + "=" + algorithmExecIdStr); - } - } - try { - handleInternal(request, response, context); - } catch (Exception e) { - log.error(e.getMessage()); - String data = e.getMessage(); - String type = user_error; //type could be error, user_error, warning regarding the error occurred along the process - String result = HttpAsyncMiningQueryHelper.defaultOutputFormat(data, type); - BasicHttpEntity entity = new BasicHttpEntity(); - entity.setContent(new ByteArrayInputStream(result.getBytes())); - response.setStatusCode(HttpStatus.SC_BAD_REQUEST); - response.setEntity(entity); - } - httpExchange.submitResponse(new BasicAsyncResponseProducer(response)); - } - - private String preALgoExecutionChecks(HttpRequest request) throws UnsupportedHttpVersionException { - log.debug("Validate method ..."); - RequestLine requestLine = request.getRequestLine(); - String uri = requestLine.getUri(); - String algorithmName = uri.substring(uri.lastIndexOf('/') + 1); - String method = requestLine.getMethod().toUpperCase(Locale.ENGLISH); - - if (!"POST".equals(method)) { - throw new UnsupportedHttpVersionException(method + "not supported."); - } - log.debug("Posting " + algorithmName + " ...\n"); - return algorithmName; - } - - private void handleInternal(HttpRequest request, HttpResponse response, HttpContext context) - throws Exception { - - List<ContainerProxy> nodesToBeChecked; - int numberOfContainers; - - //Check given method - String algorithmName = preALgoExecutionChecks(request); - - //Get parameters of given algorithm - HashMap<String, String> inputContent = HttpAsyncMiningQueryHelper.getAlgoParameters(request); - - try { - nodesToBeChecked=HttpAsyncMiningQueryHelper.getInputAlgo(inputContent); - - if (nodesToBeChecked == null) - throw new Exception("No nodes found to run experiment"); - for(ContainerProxy node : nodesToBeChecked) { - if(inputContent!=null && inputContent.containsKey("pathology")) - pingContainer(node, inputContent.get("pathology")); - else - pingContainer(node, null); - } - - - - ContainerProxy[] usedContainerProxies = nodesToBeChecked.toArray(new ContainerProxy[nodesToBeChecked.size()]); - numberOfContainers = nodesToBeChecked.size(); - log.debug("Containers: " + numberOfContainers); - String algorithmKey = algorithmName + "_" + System.currentTimeMillis(); - String dfl; - AdpDBClientQueryStatus queryStatus; - - AlgorithmProperties algorithmProperties = Algorithms.getInstance().getAlgorithmProperties(algorithmName); - - if (algorithmProperties == null) - throw new AlgorithmException(algorithmName,"The algorithm '" + algorithmName + "' does not exist."); - - algorithmProperties.mergeAlgorithmParametersWithInputContent(inputContent); - - DataSerialization ds = DataSerialization.summary; - - // Bypass direct composer call in case of iterative algorithm. - if (algorithmProperties.getType().equals(AlgorithmProperties.AlgorithmType.iterative) || - algorithmProperties.getType().equals(AlgorithmProperties.AlgorithmType.python_iterative)) { - - final IterativeAlgorithmState iterativeAlgorithmState = - iterationsHandler.handleNewIterativeAlgorithmRequest( - manager, algorithmKey, algorithmProperties, usedContainerProxies); - - log.info("Iterative algorithm " + algorithmKey + " execution started."); - - BasicHttpEntity entity = new NIterativeAlgorithmResultEntity( - iterativeAlgorithmState, ds, ExaremeGatewayUtils.RESPONSE_BUFFER_SIZE); - - response.setStatusCode(HttpStatus.SC_OK); - response.setEntity(entity); - } else { - dfl = Composer.composeDFLScript(algorithmKey, algorithmProperties, numberOfContainers); - try { - Composer.persistDFLScriptToAlgorithmsDemoDirectory( - HBPConstants.DEMO_ALGORITHMS_WORKING_DIRECTORY + "/" + algorithmKey - + "/" + algorithmKey, - dfl, null); - } catch (IOException e) { - // Ignoring error if failed to persist DFL Scripts - it's not something fatal. - log.error(e); - } - - AdpDBClientProperties clientProperties = - new AdpDBClientProperties( - HBPConstants.DEMO_DB_WORKING_DIRECTORY + algorithmKey, - "", "", false, false, - -1, 10); - clientProperties.setContainerProxies(usedContainerProxies); - AdpDBClient dbClient = - AdpDBClientFactory.createDBClient(manager, clientProperties); - queryStatus = dbClient.query(algorithmKey, dfl); - - log.info("Algorithm " + algorithmKey + " with queryID " - + queryStatus.getQueryID() + " execution started. DFL Script: \n " + dfl); - - BasicHttpEntity entity = new NQueryResultEntity(queryStatus, ds, - ExaremeGatewayUtils.RESPONSE_BUFFER_SIZE); - response.setStatusCode(HttpStatus.SC_OK); - response.setEntity(entity); - } - } catch (IterationsFatalException e) { - log.error(e); - if (e.getErroneousAlgorithmKey() != null) - iterationsHandler.removeIterativeAlgorithmStateInstanceFromISM( - e.getErroneousAlgorithmKey()); - log.error(e); - BasicHttpEntity entity = new BasicHttpEntity(); - String data = e.getMessage(); - String type = error; //type could be error, user_error, warning regarding the error occured along the process - String result = HttpAsyncMiningQueryHelper.defaultOutputFormat(data, type); - entity.setContent(new ByteArrayInputStream(result.getBytes())); - response.setStatusCode(HttpStatus.SC_BAD_REQUEST); - response.setEntity(entity); - } catch (JsonSyntaxException e) { - log.error("Could not parse the algorithms properly."); - String data = "Could not parse the algorithms properly."; - String type = error; //type could be error, user_error, warning regarding the error occured along the process - String result = HttpAsyncMiningQueryHelper.defaultOutputFormat(data, type); - BasicHttpEntity entity = new BasicHttpEntity(); - entity.setContent(new ByteArrayInputStream(result.getBytes())); - response.setStatusCode(HttpStatus.SC_BAD_REQUEST); - response.setEntity(entity); - } catch (PathologyException | DatasetsException | IOException e) { - log.error(e.getMessage()); - String data = e.getMessage(); - String type = user_error; //type could be error, user_error, warning regarding the error occured along the process - String result = HttpAsyncMiningQueryHelper.defaultOutputFormat(data, type); - BasicHttpEntity entity = new BasicHttpEntity(); - entity.setContent(new ByteArrayInputStream(result.getBytes())); - response.setStatusCode(HttpStatus.SC_BAD_REQUEST); - response.setEntity(entity); - } - catch (Exception e) { - log.error(e); - String data = e.getMessage(); - String type = error; //type could be error, user_error, warning regarding the error occured along the process - String result = HttpAsyncMiningQueryHelper.defaultOutputFormat(data, type); - BasicHttpEntity entity = new BasicHttpEntity(); - entity.setContent(new ByteArrayInputStream(result.getBytes())); - response.setStatusCode(HttpStatus.SC_BAD_REQUEST); - response.setEntity(entity); - } - } - - - private void pingContainer(ContainerProxy container, String pathology) throws Exception { - InetAddress checkIP = InetAddress.getByName(container.getEntityName().getIP()); - Gson gson = new Gson(); - String availableDatasets; - log.debug("Checking worker with IP "+container.getEntityName().getIP()); - if (checkIP.isReachable(5000)) { - log.debug("Host "+ container.getEntityName().getIP()+"is reachable"); - } - else { - ArtRegistryLocator.getArtRegistryProxy().removeContainer(container.getEntityName()); - - HashMap<String, String> names = HttpAsyncMiningQueryHelper.getNamesOfActiveNodesInConsul(); - String name = names.get(container.getEntityName().getIP()); - //Delete pathologies and IP of the node - String pathologyKey = HttpAsyncMiningQueryHelper.searchConsul(System.getenv("DATA") + "/" + name + "?keys"); - String[] pathologyKeyArray = gson.fromJson(pathologyKey, String[].class); - for (String p : pathologyKeyArray) { - deleteFromConsul(p); //Delete every pathology for node with name $name - } - //Delete IP of active_worker with name $name - deleteFromConsul(System.getenv("EXAREME_ACTIVE_WORKERS_PATH") + "/" + name); - - if (pathology==null){ - throw new Exception("Re run your experiment using available data"); - } - - availableDatasets = HttpAsyncMiningQueryHelper.getAvailableDatasetsFromConsul(pathology); - if (availableDatasets!=null){ - throw new Exception("Re run your experiment using available data: '"+availableDatasets+"'"); - } - else{ - throw new Exception("No data available to run any other experiments. Consult your system administration."); - } - } - } - - //Some times infos regarding Exareme nodes exist in Consul-Key-Value store], but the nodes are not part of Exareme's registry. We delete the infos from Consul[Key-Value store] - private void deleteFromConsul(String query) throws IOException { - CloseableHttpClient httpclient = HttpClients.createDefault(); - String consulURL = System.getenv("CONSULURL"); - if (consulURL == null) throw new IOException("Consul url not set"); - if (!consulURL.startsWith("http://")) { - consulURL = "http://" + consulURL; - } - HttpDelete httpDelete; - httpDelete = new HttpDelete(consulURL + "/v1/kv/" + query); - - //curl -X DELETE $CONSULURL/v1/kv/$DATASETS/$NODE_NAME - //curl -X DELETE $CONSULURL/v1/kv/$1/$NODE_NAME - - log.debug("Running: " + httpDelete.getURI()); - - CloseableHttpResponse response = null; - if (httpDelete.toString().contains(System.getenv("EXAREME_ACTIVE_WORKERS_PATH") + "/") || httpDelete.toString().contains(System.getenv("DATA") + "/")) { //if we can not contact : http://exareme-keystore:8500/v1/kv/master* or http://exareme-keystore:8500/v1/kv/datasets* - try { //then throw exception - response = httpclient.execute(httpDelete); - if (response.getStatusLine().getStatusCode() != 200) { - throw new ServerException("Cannot contact consul", new Exception(EntityUtils.toString(response.getEntity()))); - } - } finally { - response.close(); - } - } - } - -} - - - diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HttpAsyncMiningQueryHelper.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HttpAsyncMiningQueryHelper.java deleted file mode 100644 index 0078a3857e3c022f7b95b9f099b8b6ef0c94c451..0000000000000000000000000000000000000000 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/HttpAsyncMiningQueryHelper.java +++ /dev/null @@ -1,305 +0,0 @@ -package madgik.exareme.master.gateway.async.handler; - -import com.google.gson.Gson; -import madgik.exareme.master.gateway.async.handler.Exceptions.DatasetsException; -import madgik.exareme.master.gateway.async.handler.Exceptions.PathologyException; -import madgik.exareme.worker.art.container.Container; -import madgik.exareme.worker.art.container.ContainerProxy; -import madgik.exareme.worker.art.registry.ArtRegistryLocator; -import org.apache.http.HttpEntity; -import org.apache.http.HttpEntityEnclosingRequest; -import org.apache.http.HttpRequest; -import org.apache.http.HttpStatus; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.entity.BasicHttpEntity; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; -import org.apache.http.util.EntityUtils; -import org.apache.log4j.Logger; - -import javax.xml.crypto.Data; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.rmi.RemoteException; -import java.util.*; - -public class HttpAsyncMiningQueryHelper { - - private static final Logger log = Logger.getLogger(HttpAsyncMiningQueryHandler.class); - private static final String user_error = new String("text/plain+user_error"); - - private static HashMap<String, String[]> getNodesForPathology(String pathology) throws IOException, PathologyException { - Gson gson = new Gson(); - HashMap<String, String[]> nodeDatasets = new HashMap<>(); - List<String> pathologyNodes = new ArrayList<String>(); - - String masterKey = searchConsul(System.getenv("EXAREME_MASTER_PATH") + "/?keys"); - String[] masterKeysArray = gson.fromJson(masterKey, String[].class); - - String masterName = masterKeysArray[0].replace(System.getenv("EXAREME_MASTER_PATH") + "/", ""); - String masterIP = searchConsul(System.getenv("EXAREME_MASTER_PATH") + "/" + masterName + "?raw"); - - String pathologyKey = searchConsul(System.getenv("DATA") + "/" + masterName + "/" + pathology + "?keys"); - String[] pathologyKeyKeysArray = gson.fromJson(pathologyKey, String[].class); - - if (pathologyKeyKeysArray != null) { - pathologyNodes.add(pathologyKeyKeysArray[0]); //Add Master Pathology - } - - String datasetKey = searchConsul(System.getenv("DATA") + "/" + masterName + "/" + pathology + "?raw"); - String[] datasetKeysArray = gson.fromJson(datasetKey, String[].class); - if (datasetKeysArray != null) - nodeDatasets.put(masterIP, datasetKeysArray); //Map Master IP-> Matser Datasets - - String workersKey = searchConsul(System.getenv("EXAREME_ACTIVE_WORKERS_PATH") + "/?keys"); - if (workersKey == null) //No workers running - return nodeDatasets; //return master's Datasets only - String[] workerKeysArray = gson.fromJson(workersKey, String[].class); - for (String worker : workerKeysArray) { - String workerName = worker.replace(System.getenv("EXAREME_ACTIVE_WORKERS_PATH") + "/", ""); - String workerIP = searchConsul(System.getenv("EXAREME_ACTIVE_WORKERS_PATH") + "/" + workerName + "?raw"); - - - pathologyKey = searchConsul(System.getenv("DATA") + "/" + workerName + "/" + pathology + "?keys"); - pathologyKeyKeysArray = gson.fromJson(pathologyKey, String[].class); - - if (pathologyKeyKeysArray != null) { - pathologyNodes.add(pathologyKeyKeysArray[0]); //Add worker Pathology - } - - datasetKey = searchConsul(System.getenv("DATA") + "/" + workerName + "/" + pathology + "?raw"); - datasetKeysArray = gson.fromJson(datasetKey, String[].class); - if (datasetKeysArray != null) - nodeDatasets.put(workerIP, datasetKeysArray); //Map Worker's IP-> Worker's Datasets - } - - if (pathologyNodes.isEmpty()) { - throw new PathologyException("Pathology " + pathology + " not found!"); - } - - return nodeDatasets; - } - - - - static HashMap<String, String> getNamesOfActiveNodesInConsul() throws Exception { - Gson gson = new Gson(); - HashMap<String, String> nodeNames = new HashMap<>(); - String masterKey = searchConsul(System.getenv("EXAREME_MASTER_PATH") + "/?keys"); - String[] masterKeysArray = gson.fromJson(masterKey, String[].class); //Map Master's IP-> Master's Name - - String masterName = masterKeysArray[0].replace(System.getenv("EXAREME_MASTER_PATH") + "/", ""); - String masterIP = searchConsul(System.getenv("EXAREME_MASTER_PATH") + "/" + masterName + "?raw"); - nodeNames.put(masterIP, masterName); - - String workersKey = searchConsul(System.getenv("EXAREME_ACTIVE_WORKERS_PATH") + "/?keys"); - if (workersKey == null) //No workers running - return nodeNames; //return master only - String[] workerKeysArray = gson.fromJson(workersKey, String[].class); - for (String worker : workerKeysArray) { - String workerName = worker.replace(System.getenv("EXAREME_ACTIVE_WORKERS_PATH") + "/", ""); - String workerIP = searchConsul(System.getenv("EXAREME_ACTIVE_WORKERS_PATH") + "/" + workerName + "?raw"); - nodeNames.put(workerIP, workerName); //Map Worker's IP-> Worker's Name - } - return nodeNames; - } - - - static HashMap<String, String> getAlgoParameters(HttpRequest request) throws IOException { - - log.debug("Parsing content ..."); - HashMap<String, String> inputContent = new HashMap<>(); - List<Map> parameters = new ArrayList(); - String content; - - if (request instanceof HttpEntityEnclosingRequest) { - log.debug("Streaming ..."); - HttpEntity entity = ((HttpEntityEnclosingRequest) request).getEntity(); - content = EntityUtils.toString(entity); - if (content != null && !content.isEmpty()) { - parameters = new Gson().fromJson(content, List.class); - } - } - if (!parameters.isEmpty()) { - log.debug("All of the parameters: " + parameters); - for (Map k : parameters) { - String name = (String) k.get("name"); - String value = (String) k.get("value"); - if (name == null || name.isEmpty() || value == null || value.isEmpty()) continue; - - log.debug("Parameter in the json: "); - log.debug(name + " = " + value); - - value = value.replaceAll("[^A-Za-z0-9,._~*+><=&|(){}:\\-\\\"\\[\\]]", ""); // ><=&| we no more need those for filtering - value = value.replaceAll("\\s+", ""); - - log.debug("Parameter after format: "); - log.debug(name + " = " + value); - - inputContent.put(name, value); - } - return inputContent; - } - return null; - } - - - static String searchConsul(String query) throws IOException { - String result = null; - CloseableHttpClient httpclient = HttpClients.createDefault(); - String consulURL = System.getenv("CONSULURL"); - if (consulURL == null) throw new IOException("Consul url not set"); - if (!consulURL.startsWith("http://")) { - consulURL = "http://" + consulURL; - } - - HttpGet httpGet; - httpGet = new HttpGet(consulURL + "/v1/kv/" + query); - log.debug("Running: " + httpGet.getURI()); - CloseableHttpResponse response = null; - if (httpGet.toString().contains(System.getenv("EXAREME_MASTER_PATH") + "/") || httpGet.toString().contains(System.getenv("DATA") + "/")) { //if we can not contact : http://exareme-keystore:8500/v1/kv/master* or http://exareme-keystore:8500/v1/kv/datasets* - try { //then throw exception - response = httpclient.execute(httpGet); - } catch (Exception e) { - response.close(); - } - result = EntityUtils.toString(response.getEntity()); - } - if (httpGet.toString().contains(System.getenv("EXAREME_ACTIVE_WORKERS_PATH") + "/")) { //if we can not contact : http://exareme-keystore:8500/v1/kv/active_workers* - //then maybe there are no workers running - try { - response = httpclient.execute(httpGet); - if (response.getStatusLine().getStatusCode() != 200) { - if (httpGet.toString().contains("?keys")) - log.debug("No workers running. Continue with master"); - } else { - result = EntityUtils.toString(response.getEntity()); - } - } - catch (Exception e){ - response.close(); - } - } - return result; - } - - public static List<ContainerProxy> getInputAlgo(HashMap<String, String> inputContent) throws IOException, DatasetsException, PathologyException { - List<ContainerProxy> nodesToBeChecked = new ArrayList<>(); - String datasets; - String[] userDatasets = null; - String pathology = null; - HashMap<String, String[]> nodeDatasets = new HashMap<>(); - - if (inputContent == null ) { //list_datasets - nodesToBeChecked.addAll(Arrays.asList(ArtRegistryLocator.getArtRegistryProxy().getContainers())); - return nodesToBeChecked; - } - else if(inputContent.size()==1 && inputContent.containsKey("pathology")) { //list_variables - pathology = inputContent.get("pathology"); - nodeDatasets = getNodesForPathology(pathology); - - nodesToBeChecked.addAll(Arrays.asList(ArtRegistryLocator.getArtRegistryProxy().getContainers())); - return nodesToBeChecked; - } - - else { - if (inputContent.containsKey("pathology")) { - pathology = inputContent.get("pathology"); - nodeDatasets = getNodesForPathology(pathology); - } - else - throw new PathologyException("The parameter pathology should not be blank"); - if (inputContent.containsKey("dataset")) { - datasets = inputContent.get("dataset"); - //Get datasets provided by user - userDatasets = datasets.split(","); - } - else - throw new DatasetsException("The parameter dataset should not be blank"); - if(nodeDatasets.isEmpty()){ - throw new PathologyException("Existing nodes do not have data for pathology: "+pathology); - } - nodesToBeChecked = checkDatasets(nodeDatasets, userDatasets, pathology); - } - return nodesToBeChecked; - } - - - private static List<ContainerProxy> checkDatasets(HashMap<String, String[]> nodeDatasets, String[] userDatasets, String pathology) throws DatasetsException, RemoteException { - List<String> notFoundDatasets = new ArrayList<>(); - List<String> nodesToBeChecked = new ArrayList<>(); - List<ContainerProxy> containers = new ArrayList<>(); - - Boolean flag; - - //for every dataset provided by the user - for (String data : userDatasets) { - Iterator<Map.Entry<String, String[]>> entries = nodeDatasets.entrySet().iterator(); - flag = false; - //for each Exareme node (master/workers) - while (entries.hasNext()) { - Map.Entry<String, String[]> entry = entries.next(); - String IP = entry.getKey(); - String[] datasets = entry.getValue(); - //if dataset exist in that Exareme node - if (Arrays.asList(datasets).contains(data)) { - //and Exareme node not already added to list nodesToBeChecked - if (!nodesToBeChecked.contains(IP)) { - nodesToBeChecked.add(IP); - for(ContainerProxy cp : ArtRegistryLocator.getArtRegistryProxy().getContainers()){ - if (cp.getEntityName().getIP().equals(IP)) { - containers.add(cp); - break; - } - } - } - flag = true; - continue; - } - } - //if flag=false then dataset(s) provided by user are not contained in ANY Exareme node - if (!flag) { - notFoundDatasets.add(data); - } - } - //if notFoundDatasets list is not empty, there are dataset(s) provided by user not contained in ANY Exareme node - if (notFoundDatasets.size() != 0) { - StringBuilder notFound = new StringBuilder(); - for (String ds : notFoundDatasets) - notFound.append(ds).append(", "); - String notFoundSring = notFound.toString(); - notFoundSring = notFoundSring.substring(0, notFoundSring.length() - 2); - //Show appropriate error message to user - throw new DatasetsException("Dataset(s) " + notFoundSring + " not found for pathology " +pathology + "!"); - } - return containers; - - } - - - static String getAvailableDatasetsFromConsul(String pathology) throws Exception { - HashMap<String,String> names = getNamesOfActiveNodesInConsul(); - StringBuilder datasets=new StringBuilder(); - Gson gson = new Gson(); - - for (Map.Entry<String, String> entry : names.entrySet()) { - String dataRaw = searchConsul(System.getenv("DATA") + "/" + entry.getValue() + "/" + pathology + "?raw"); - String[] data = gson.fromJson(dataRaw,String[].class); - for (String d : data){ - if(!d.isEmpty()) - datasets.append(d).append(","); - } - } - if(!datasets.toString().isEmpty()) - return datasets.substring(0, datasets.length() - 1); - else - return null; - } - - static String defaultOutputFormat(String data, String type) { - return "{\"result\" : [{\"data\":" + "\"" + data + "\",\"type\":" + "\"" + type + "\"}]}"; - } -} - - diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/entity/NQueryResultEntity.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/entity/NQueryResultEntity.java index d455b0fbdfe893c7814dda7158a4d28b4acc5b67..a0dd23fde67cad66c62ba545b79f9180990c2aa6 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/entity/NQueryResultEntity.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/entity/NQueryResultEntity.java @@ -2,22 +2,20 @@ package madgik.exareme.master.gateway.async.handler.entity; import madgik.exareme.master.client.AdpDBClientQueryStatus; import madgik.exareme.master.connector.DataSerialization; -import org.apache.commons.io.IOUtils; +import madgik.exareme.master.gateway.async.handler.HBP.HBPQueryHelper; import org.apache.http.entity.BasicHttpEntity; import org.apache.http.nio.ContentEncoder; import org.apache.http.nio.IOControl; import org.apache.http.nio.entity.HttpAsyncContentProducer; import org.apache.log4j.Logger; +import java.io.ByteArrayInputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.Channels; import java.nio.channels.ReadableByteChannel; import java.nio.charset.StandardCharsets; -/** - * TODO flush output before suspend - */ public class NQueryResultEntity extends BasicHttpEntity implements HttpAsyncContentProducer { private static final Logger log = Logger.getLogger(NQueryResultEntity.class); @@ -26,7 +24,7 @@ public class NQueryResultEntity extends BasicHttpEntity implements HttpAsyncCont private final ByteBuffer buffer; private ReadableByteChannel channel; private NQueryStatusEntity.QueryStatusListener l; - private DataSerialization format; + private final DataSerialization format; private final static String user_error = "text/plain+user_error"; private final static String error = "text/plain+error"; private final static String warning = "text/plain+warning"; @@ -41,23 +39,26 @@ public class NQueryResultEntity extends BasicHttpEntity implements HttpAsyncCont format = ds; } - @Override - public void produceContent(ContentEncoder encoder, IOControl ioctrl) + public void produceContent(ContentEncoder encoder, IOControl iocontrol) throws IOException { if (!queryStatus.hasFinished() && !queryStatus.hasError()) { if (l == null) { - l = new NQueryStatusEntity.QueryStatusListener(ioctrl); + l = new NQueryStatusEntity.QueryStatusListener(iocontrol); queryStatus.registerListener(l); } - ioctrl.suspendOutput(); + iocontrol.suspendOutput(); return; } if (!queryStatus.hasError()) { if (channel == null) { - channel = Channels.newChannel(queryStatus.getResult(format)); + String result = queryStatus.getResult(format); + log.info("Algorithm with queryId " + queryStatus.getQueryID().getQueryID() + + " terminated. Result: \n " + result); + channel = Channels.newChannel( + new ByteArrayInputStream(result.getBytes(StandardCharsets.UTF_8))); } channel.read(buffer); buffer.flip(); @@ -66,75 +67,56 @@ public class NQueryResultEntity extends BasicHttpEntity implements HttpAsyncCont this.buffer.compact(); if (i < 1 && !buffering) { encoder.complete(); + closeQuery(); close(); } - } else { - log.trace("|" + queryStatus.getError() + "|"); if (queryStatus.getError().contains("ExaremeError:")) { String data = queryStatus.getError().substring(queryStatus.getError().lastIndexOf("ExaremeError:") + "ExaremeError:".length()).replaceAll("\\s", " "); - //type could be error, user_error, warning regarding the error occurred along the process - String type = user_error; - String result = defaultOutputFormat(data, type); + String result = HBPQueryHelper.ErrorResponse.createErrorResponse(data, user_error); logErrorMessage(result); encoder.write(ByteBuffer.wrap(result.getBytes())); encoder.complete(); - close(); } else if (queryStatus.getError().contains("PrivacyError")) { String data = "The Experiment could not run with the input provided because there are insufficient data."; - //type could be error, user_error, warning regarding the error occurred along the process - String type = warning; - String result = defaultOutputFormat(data, type); + String result = HBPQueryHelper.ErrorResponse.createErrorResponse(data, warning); logErrorMessage(result); encoder.write(ByteBuffer.wrap(result.getBytes())); encoder.complete(); - close(); } else if (queryStatus.getError().contains("java.rmi.RemoteException")) { String data = "One or more containers are not responding. Please inform the system administrator."; - //type could be error, user_error, warning regarding the error occurred along the process - String type = error; - String result = defaultOutputFormat(data, type); + String result = HBPQueryHelper.ErrorResponse.createErrorResponse(data, error); logErrorMessage(result); encoder.write(ByteBuffer.wrap(result.getBytes())); encoder.complete(); - close(); - } else if (queryStatus.getError().contains("java.lang.IndexOutOfBoundsException:")) { - String data = "Something went wrong. Please inform the system administrator."; - //type could be error, user_error, warning regarding the error occurred along the process - String type = error; - String result = defaultOutputFormat(data, type); - logErrorMessage(result); - encoder.write(ByteBuffer.wrap(result.getBytes())); - encoder.complete(); - close(); } else { + log.info("Exception when running the query: " + queryStatus.getError()); String data = "Something went wrong. Please inform the system administrator."; - //type could be error, user_error, warning regarding the error occurred along the process - String type = error; - String result = defaultOutputFormat(data, type); + String result = HBPQueryHelper.ErrorResponse.createErrorResponse(data, error); logErrorMessage(result); encoder.write(ByteBuffer.wrap(result.getBytes())); encoder.complete(); - close(); } + closeQuery(); + close(); } } - private String defaultOutputFormat(String data, String type) { - return "{\"result\" : [{\"data\":" + "\"" + data + "\",\"type\":" + "\"" + type + "\"}]}"; + @Override + public boolean isRepeatable() { + return false; } - private void logErrorMessage(String error){ - log.info("Algorithm exited with error and returned:\n " + error); + public void closeQuery() throws IOException { + queryStatus.close(); } @Override - public void close() throws IOException { - queryStatus.close(); + public void close() { + } - @Override - public boolean isRepeatable() { - return false; + private void logErrorMessage(String error) { + log.info("Algorithm exited with error and returned:\n " + error); } } diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/entity/NQueryStatusEntity.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/entity/NQueryStatusEntity.java index e595c76897b30d0b92bf82c820ff70fb98c9c8ae..0db185cf98ab8e34f5343c9aaaf6466632227d3c 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/entity/NQueryStatusEntity.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/async/handler/entity/NQueryStatusEntity.java @@ -69,7 +69,7 @@ public class NQueryStatusEntity extends BasicHttpEntity implements HttpAsyncCont } @Override - public void close() throws IOException { + public void close() { } @@ -84,7 +84,6 @@ public class NQueryStatusEntity extends BasicHttpEntity implements HttpAsyncCont @Override public void statusChanged(AdpDBQueryID queryID, AdpDBStatus status) { ioctl.requestOutput(); - } @Override diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/control/handler/HttpAsyncCheckWorker.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/control/handler/HttpAsyncCheckWorker.java index e5efed4443d1a6afd928ab1158875d756480823d..b8a0e9bae987d649d1ccf142f82ed20621a161a7 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/control/handler/HttpAsyncCheckWorker.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/gateway/control/handler/HttpAsyncCheckWorker.java @@ -9,11 +9,15 @@ import madgik.exareme.master.connector.DataSerialization; import madgik.exareme.master.engine.AdpDBManager; import madgik.exareme.master.engine.AdpDBManagerLocator; import madgik.exareme.master.gateway.ExaremeGatewayUtils; +import madgik.exareme.master.gateway.async.handler.HBP.Exceptions.RequestException; +import madgik.exareme.master.gateway.async.handler.HBP.Exceptions.BadUserInputException; import madgik.exareme.master.gateway.async.handler.entity.NQueryResultEntity; -import madgik.exareme.master.queryProcessor.composer.*; -import madgik.exareme.master.queryProcessor.composer.Exceptions.AlgorithmException; -import madgik.exareme.master.queryProcessor.composer.Exceptions.CDEsMetadataException; -import madgik.exareme.master.queryProcessor.composer.Exceptions.ComposerException; +import madgik.exareme.master.queryProcessor.HBP.AlgorithmProperties; +import madgik.exareme.master.queryProcessor.HBP.Algorithms; +import madgik.exareme.master.queryProcessor.HBP.Composer; +import madgik.exareme.master.queryProcessor.HBP.Exceptions.AlgorithmException; +import madgik.exareme.master.queryProcessor.HBP.Exceptions.CDEsMetadataException; +import madgik.exareme.master.queryProcessor.HBP.Exceptions.ComposerException; import madgik.exareme.worker.art.container.ContainerProxy; import madgik.exareme.worker.art.registry.ArtRegistryLocator; import org.apache.http.*; @@ -23,12 +27,16 @@ import org.apache.http.protocol.HttpContext; import org.apache.log4j.Logger; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; public class HttpAsyncCheckWorker implements HttpAsyncRequestHandler<HttpRequest> { private static final Logger log = Logger.getLogger(madgik.exareme.master.gateway.control.handler.HttpAsyncCheckWorker.class); private static final AdpDBManager manager = AdpDBManagerLocator.getDBManager(); private static final String algorithmName = "HEALTH_CHECK"; + public HttpAsyncCheckWorker() { super(); } @@ -36,61 +44,64 @@ public class HttpAsyncCheckWorker implements HttpAsyncRequestHandler<HttpRequest public HttpAsyncRequestConsumer<HttpRequest> processRequest( final HttpRequest request, final HttpContext context) { - // Buffer request content in memory for simplicity - return new BasicAsyncRequestConsumer(); - } + // Buffer request content in memory for simplicity + return new BasicAsyncRequestConsumer(); + } + public void handle( final HttpRequest request, final HttpAsyncExchange httpexchange, final HttpContext context) throws HttpException, IOException { - HttpResponse response = httpexchange.getResponse(); + HttpResponse response = httpexchange.getResponse(); try { handleInternal(request, response, context); - } catch (AlgorithmException | CDEsMetadataException | ComposerException e) { + } catch (AlgorithmException | CDEsMetadataException | ComposerException | BadUserInputException | RequestException e) { e.printStackTrace(); } httpexchange.submitResponse(new BasicAsyncResponseProducer(response)); - } + } + private void handleInternal( - final HttpRequest request, - final HttpResponse response, - final HttpContext context) throws HttpException, IOException, AlgorithmException, CDEsMetadataException, ComposerException { + final HttpRequest request, + final HttpResponse response, + final HttpContext context + ) throws HttpException, IOException, AlgorithmException, CDEsMetadataException, ComposerException, BadUserInputException, RequestException { String method = request.getRequestLine().getMethod().toUpperCase(Locale.ENGLISH); if (!method.equals("GET") && !method.equals("HEAD") && !method.equals("POST")) { throw new MethodNotSupportedException(method + " method not supported"); } - AdpDBClientQueryStatus queryStatus; - String IP_MASTER = null; - String IP_WORKER = null; + String NODE_IP = null; + String NODE_NAME = null; DataSerialization ds = DataSerialization.summary; - String[] getIPs = request.getRequestLine().getUri().split("\\?"); + String[] url = request.getRequestLine().getUri().split("\\?"); + String[] urlParameters = url[1].split("&"); - if (getIPs[1].split("=")[0].equals("IP_MASTER")) - IP_MASTER = getIPs[1].split("=")[1]; + if (urlParameters[0].split("=")[0].equals("NODE_IP")) + NODE_IP = urlParameters[0].split("=")[1]; - if (getIPs[2].split("=")[0].equals("IP_WORKER")) - IP_WORKER = getIPs[2].split("=")[1]; + if (urlParameters[1].split("=")[0].equals("NODE_NAME")) + NODE_NAME = urlParameters[1].split("=")[1]; - log.debug("MASTER: " + IP_MASTER); - log.debug("WORKER: " + IP_WORKER); + // Execute HEALTH_CHECK algorithm for health checks in bootstrap.sh via "curl -s ${MASTER_IP}:9092/check/worker?NODE_IP=${NODE_IP}" + // Retrieve json result and check of the NODE_NAME of the node exist in the result. + String algorithmKey = algorithmName + "_" + System.currentTimeMillis(); + log.info("Executing algorithm: " + algorithmName + " with key: " + algorithmKey); + log.info("Algorithm Nodes: "); + log.info(" IP: " + NODE_IP + " , NAME: " + NODE_NAME); - //Execute LIST_DATASET for health checks in bootsrap.sh via "curl -s ${MASTER_IP}:9092/check/worker?IP_MASTER=${MY_IP}?IP_WORKER=${MY_IP}" - //Retrieve json result and check of the NODE_NAME of the node exist in the result. - String algorithmKey = algorithmName + "_" + System.currentTimeMillis(); String dfl; HashMap<String, String> inputContent = new HashMap<>(); AlgorithmProperties algorithmProperties = Algorithms.getInstance().getAlgorithmProperties(algorithmName); if (algorithmProperties == null) - throw new AlgorithmException(algorithmName, "The algorithm does not exist."); - - algorithmProperties.mergeAlgorithmParametersWithInputContent(inputContent); + throw new RequestException(algorithmName, "The algorithm does not exist."); + algorithmProperties.mergeWithAlgorithmParameters(inputContent); - dfl = Composer.composeDFLScript(algorithmKey, algorithmProperties, 2); //each time a Worker that try to connect with Exareme and the Master + dfl = Composer.composeDFLScript(algorithmKey, algorithmProperties, 1); //each time a Worker that try to connect with Exareme and the Master log.debug(dfl); try { Composer.persistDFLScriptToAlgorithmsDemoDirectory( @@ -111,17 +122,7 @@ public class HttpAsyncCheckWorker implements HttpAsyncRequestHandler<HttpRequest ContainerProxy[] usedContainerProxies; List<ContainerProxy> usedContainerProxiesList = new ArrayList<>(); List<String> nodesToBeChecked = new ArrayList<>(); - - if (Objects.equals(IP_MASTER, IP_WORKER)){ - log.debug("It seams like a health check for Master node["+IP_MASTER+"] only"); - nodesToBeChecked.add(IP_MASTER); - } - else { - log.debug("It seams like a health check for Master node["+IP_MASTER+"] - Worker node["+IP_WORKER+"]"); - nodesToBeChecked.add(IP_MASTER); - nodesToBeChecked.add(IP_WORKER); - } - + nodesToBeChecked.add(NODE_IP); for (ContainerProxy containerProxy : ArtRegistryLocator.getArtRegistryProxy().getContainers()) { if (nodesToBeChecked.contains(containerProxy.getEntityName().getIP())) { usedContainerProxiesList.add(containerProxy); @@ -129,16 +130,19 @@ public class HttpAsyncCheckWorker implements HttpAsyncRequestHandler<HttpRequest } } - usedContainerProxies = usedContainerProxiesList.toArray(new ContainerProxy[usedContainerProxiesList.size()]); clientProperties.setContainerProxies(usedContainerProxies); AdpDBClient dbClient = AdpDBClientFactory.createDBClient(manager, clientProperties); - queryStatus = dbClient.query(algorithmKey, dfl); + + AdpDBClientQueryStatus queryStatus = dbClient.query(algorithmKey, dfl); + log.info("Executing algorithm " + algorithmKey + + " started with queryId " + queryStatus.getQueryID().getQueryID()); + BasicHttpEntity entity = new NQueryResultEntity(queryStatus, ds, ExaremeGatewayUtils.RESPONSE_BUFFER_SIZE); response.setStatusCode(HttpStatus.SC_OK); response.setEntity(entity); } -} +} \ No newline at end of file diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/AlgorithmProperties.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/AlgorithmProperties.java similarity index 79% rename from Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/AlgorithmProperties.java rename to Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/AlgorithmProperties.java index d04ce5385be571d01c5418a12f764b5aeb9fce7f..f25435f31664a25706926b7777ce19b91a01723e 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/AlgorithmProperties.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/AlgorithmProperties.java @@ -1,8 +1,8 @@ -package madgik.exareme.master.queryProcessor.composer; +package madgik.exareme.master.queryProcessor.HBP; -import madgik.exareme.master.queryProcessor.composer.Exceptions.AlgorithmException; -import madgik.exareme.master.queryProcessor.composer.Exceptions.CDEsMetadataException; -import madgik.exareme.master.queryProcessor.composer.Exceptions.ComposerException; +import madgik.exareme.master.gateway.async.handler.HBP.Exceptions.BadUserInputException; +import madgik.exareme.master.queryProcessor.HBP.Exceptions.AlgorithmException; +import madgik.exareme.master.queryProcessor.HBP.Exceptions.CDEsMetadataException; import org.codehaus.jettison.json.JSONArray; import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONObject; @@ -112,15 +112,14 @@ public class AlgorithmProperties { * * @param parameterName the name of a parameter * @param newParameterValue the new value of the parameter - * @return true if it was changed, false otherwise */ - public void setParameterValue(String parameterName, String newParameterValue) throws ComposerException { + public void setParameterValue(String parameterName, String newParameterValue) throws BadUserInputException { String allowedDynamicParameters = ComposerConstants.dbIdentifierKey; // Not all parameters are allowed to be changed. // This is a safety check if (!allowedDynamicParameters.contains(parameterName)) { - throw new ComposerException("The value of the parameter " + parameterName + " should not be set manually."); + throw new BadUserInputException("The value of the parameter " + parameterName + " should not be set manually."); } for (ParameterProperties parameter : parameters) { @@ -129,39 +128,37 @@ public class AlgorithmProperties { return; } } - throw new ComposerException("The parameter " + parameterName + " does not exist."); + throw new BadUserInputException("The parameter " + parameterName + " does not exist."); } /** * Gets the AlgorithmProperties from the cached Algorithms. * Merges the default algorithm properties with the parameters given in the HashMap. * - * @param inputContent a HashMap with the properties from the request - * @return the merge algorithm's properties - * @throws AlgorithmException when algorithm's properties do not match the inputContent + * @param algorithmParameters a HashMap with the parameters from the request + * @throws AlgorithmException when algorithm's properties do not match the algorithmParameters */ - public void mergeAlgorithmParametersWithInputContent(HashMap<String, String> inputContent) - throws AlgorithmException, CDEsMetadataException { - if (inputContent == null) + public void mergeWithAlgorithmParameters(HashMap<String, String> algorithmParameters) + throws AlgorithmException, CDEsMetadataException, BadUserInputException { + if (algorithmParameters == null) return; - String pathology = inputContent.get(ComposerConstants.getPathologyPropertyName()); + String pathology = algorithmParameters.get(ComposerConstants.getPathologyPropertyName()); for (ParameterProperties parameterProperties : this.getParameters()) { - String value = inputContent.get(parameterProperties.getName()); + String value = algorithmParameters.get(parameterProperties.getName()); if (value != null && !value.equals("")) { if (!parameterProperties.getValueMultiple() && value.contains(",") && !parameterProperties.getValueType().equals(ParameterProperties.ParameterValueType.json)) { - throw new AlgorithmException(name, - "The value of the parameter '" + parameterProperties.getName() - + "' should contain only one value."); + throw new BadUserInputException("The value of the parameter '" + parameterProperties.getName() + + "' should contain only one value."); } validateAlgorithmParameterValueType(name, value, parameterProperties); - validateAlgorithmParameterType(name, value, parameterProperties, pathology); + validateAlgorithmParameterType(value, parameterProperties, pathology); } else { // if value not given or it is blank if (parameterProperties.getValueNotBlank()) { - throw new AlgorithmException(name, + throw new BadUserInputException( "The value of the parameter '" + parameterProperties.getName() + "' should not be blank."); } @@ -178,42 +175,40 @@ public class AlgorithmProperties { /** * Checks if the given parameter input has acceptable values for that specific parameter. * - * @param algorithmName the name of the algorithm * @param value the value given as input * @param parameterProperties the rules that the value should follow * @param pathology the pathology that the algorithm will run on */ private static void validateAlgorithmParameterType( - String algorithmName, String value, ParameterProperties parameterProperties, String pathology - ) throws AlgorithmException, CDEsMetadataException { + ) throws CDEsMetadataException, BadUserInputException { // First we split in case we have multiple values. String[] values = value.split(","); for (String singleValue : values) { if (parameterProperties.getType().equals(ParameterProperties.ParameterType.column)) { - validateCDEVariables(algorithmName, values, parameterProperties, pathology); + validateCDEVariables(values, parameterProperties, pathology); } else if (parameterProperties.getType().equals(ParameterProperties.ParameterType.formula)) { String[] formulaValues = singleValue.split("[+\\-*:0]+"); - validateCDEVariables(algorithmName, formulaValues, parameterProperties, pathology); + validateCDEVariables(formulaValues, parameterProperties, pathology); } // If value is not a column (type=other) then check for min-max-enumerations else if (parameterProperties.getType().equals(ParameterProperties.ParameterType.other)) { if (parameterProperties.getValueType().equals(ParameterProperties.ParameterValueType.integer) || parameterProperties.getValueType().equals(ParameterProperties.ParameterValueType.real)) { if (parameterProperties.getValueMin() != null && Double.parseDouble(singleValue) < parameterProperties.getValueMin()) - throw new AlgorithmException(algorithmName, "The value(s) of the parameter '" + parameterProperties.getName() + throw new BadUserInputException("The value(s) of the parameter '" + parameterProperties.getName() + "' should be greater than " + parameterProperties.getValueMin() + " ."); if (parameterProperties.getValueMax() != null && Double.parseDouble(singleValue) > parameterProperties.getValueMax()) - throw new AlgorithmException(algorithmName, "The value(s) of the parameter '" + parameterProperties.getName() + throw new BadUserInputException("The value(s) of the parameter '" + parameterProperties.getName() + "' should be less than " + parameterProperties.getValueMax() + " ."); } else if (parameterProperties.getValueType().equals(ParameterProperties.ParameterValueType.string)) { if (parameterProperties.getValueEnumerations() == null) return; List<String> enumerations = Arrays.asList(parameterProperties.getValueEnumerations()); if (!enumerations.contains(singleValue)) - throw new AlgorithmException(algorithmName, "The value '" + singleValue + "' of the parameter '" + parameterProperties.getName() + throw new BadUserInputException("The value '" + singleValue + "' of the parameter '" + parameterProperties.getName() + "' is not included in the valueEnumerations " + Arrays.toString(parameterProperties.getValueEnumerations()) + " ."); } } @@ -225,34 +220,32 @@ public class AlgorithmProperties { * the parameter property's columnValueType and columnValueCategorical. * The information about the CDEs are taken from the metadata. * - * @param algorithmName the name of the algorithm * @param variables a list with the variables * @param parameterProperties the rules that the variables should follow * @param pathology the pathology that the algorithm will run on */ private static void validateCDEVariables( - String algorithmName, String[] variables, ParameterProperties parameterProperties, String pathology - ) throws AlgorithmException, CDEsMetadataException { + ) throws CDEsMetadataException, BadUserInputException { CDEsMetadata.PathologyCDEsMetadata metadata = CDEsMetadata.getInstance().getPathologyCDEsMetadata(pathology); for (String curValue : variables) { if (!metadata.columnExists(curValue)) { - throw new AlgorithmException(algorithmName, "The CDE '" + curValue + "' does not exist."); + throw new BadUserInputException("The CDE '" + curValue + "' does not exist."); } String allowedSQLTypeValues = parameterProperties.getColumnValuesSQLType(); String columnValuesSQLType = metadata.getColumnValuesSQLType(curValue); if (!allowedSQLTypeValues.contains(columnValuesSQLType) && !allowedSQLTypeValues.equals("")) { - throw new AlgorithmException(algorithmName, "The CDE '" + curValue + "' does not have one of the allowed SQL Types '" + throw new BadUserInputException("The CDE '" + curValue + "' does not have one of the allowed SQL Types '" + allowedSQLTypeValues + "' for the algorithm."); } String allowedIsCategoricalValue = parameterProperties.getColumnValuesIsCategorical(); String columnValuesIsCategorical = metadata.getColumnValuesIsCategorical(curValue); if (!allowedIsCategoricalValue.equals(columnValuesIsCategorical) && !allowedIsCategoricalValue.equals("")) { - throw new AlgorithmException(algorithmName, "The CDE '" + curValue + "' does not match the categorical value '" + throw new BadUserInputException("The CDE '" + curValue + "' does not match the categorical value '" + allowedIsCategoricalValue + "' specified for the algorithm."); } } @@ -269,7 +262,7 @@ public class AlgorithmProperties { String algorithmName, String value, ParameterProperties parameterProperties - ) throws AlgorithmException { + ) throws AlgorithmException, BadUserInputException { if (parameterProperties.getValueType().equals(ParameterProperties.ParameterValueType.json)) { try { new JSONObject(value); @@ -291,19 +284,19 @@ public class AlgorithmProperties { try { Double.parseDouble(curValue); } catch (NumberFormatException nfe) { - throw new AlgorithmException(algorithmName, + throw new BadUserInputException( "The value of the parameter '" + parameterProperties.getName() + "' should be a real number."); } } else if (parameterProperties.getValueType().equals(ParameterProperties.ParameterValueType.integer)) { try { Integer.parseInt(curValue); } catch (NumberFormatException e) { - throw new AlgorithmException(algorithmName, + throw new BadUserInputException( "The value of the parameter '" + parameterProperties.getName() + "' should be an integer."); } } else if (parameterProperties.getValueType().equals(ParameterProperties.ParameterValueType.string)) { if (curValue.equals("")) { - throw new AlgorithmException(algorithmName, + throw new BadUserInputException( "The value of the parameter '" + parameterProperties.getName() + "' contains an empty string."); } diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/Algorithms.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/Algorithms.java similarity index 67% rename from Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/Algorithms.java rename to Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/Algorithms.java index 4401b90ace08b17b829a047f01f8d2e33711dd66..e3c8c3140c9dee3c4307ce2a9fa78fb31f2809db 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/Algorithms.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/Algorithms.java @@ -1,7 +1,8 @@ -package madgik.exareme.master.queryProcessor.composer; +package madgik.exareme.master.queryProcessor.HBP; import com.google.gson.Gson; -import madgik.exareme.master.queryProcessor.composer.Exceptions.AlgorithmException; +import com.google.gson.JsonSyntaxException; +import madgik.exareme.master.queryProcessor.HBP.Exceptions.AlgorithmException; import org.apache.log4j.Logger; import java.io.*; @@ -14,8 +15,8 @@ import java.util.Objects; */ public class Algorithms { private static Algorithms instance = null; - private HashMap<String, AlgorithmProperties> algorithmsHashMap; - private AlgorithmProperties[] algorithmsArray; + private final HashMap<String, AlgorithmProperties> algorithmsHashMap; + private final AlgorithmProperties[] algorithmsArray; private Algorithms(String repoPath) throws IOException, AlgorithmException { Gson gson = new Gson(); @@ -25,17 +26,18 @@ public class Algorithms { ArrayList<AlgorithmProperties> currentAlgorithms = new ArrayList<>(); algorithmsHashMap = new HashMap<>(); - for (File file : Objects.requireNonNull(repoFile.listFiles(new FileFilter() { - @Override - public boolean accept(File pathname) { - if (!pathname.isDirectory()) - return false; - return new File(pathname, "properties.json").exists(); - } + for (File file : Objects.requireNonNull(repoFile.listFiles(pathname -> { + if (!pathname.isDirectory()) + return false; + return new File(pathname, "properties.json").exists(); }))) { - AlgorithmProperties algorithm = gson.fromJson(new BufferedReader( - new FileReader(file.getAbsolutePath() + "/properties.json")), AlgorithmProperties.class); - + AlgorithmProperties algorithm; + try { + algorithm = gson.fromJson(new BufferedReader( + new FileReader(file.getAbsolutePath() + "/properties.json")), AlgorithmProperties.class); + } catch (JsonSyntaxException e) { + throw new AlgorithmException("Unknown", "Could not parse algorithms properly: " + e.getMessage()); + } algorithm.validateAlgorithmPropertiesInitialization(); algorithmsHashMap.put(algorithm.getName(), algorithm); currentAlgorithms.add(algorithm); diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/CDEsMetadata.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/CDEsMetadata.java similarity index 94% rename from Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/CDEsMetadata.java rename to Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/CDEsMetadata.java index d6812d2cf580b15244f2cf5721f0f7de90ce2558..6dda2793159a4375f63d033f06e1da6af100c027 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/CDEsMetadata.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/CDEsMetadata.java @@ -1,12 +1,11 @@ -package madgik.exareme.master.queryProcessor.composer; +package madgik.exareme.master.queryProcessor.HBP; import java.io.File; import java.io.FileFilter; import java.sql.*; import java.util.HashMap; -import java.util.Set; -import madgik.exareme.master.queryProcessor.composer.Exceptions.CDEsMetadataException; +import madgik.exareme.master.queryProcessor.HBP.Exceptions.CDEsMetadataException; import org.apache.log4j.Logger; /** diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/Composer.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/Composer.java similarity index 98% rename from Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/Composer.java rename to Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/Composer.java index 251b5a38c6bf06502ca4722036b4d8eb10463dfc..a94c91e742c5ae4b9d10bbf851b7fe062fb5e854 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/Composer.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/Composer.java @@ -1,4 +1,4 @@ -package madgik.exareme.master.queryProcessor.composer; +package madgik.exareme.master.queryProcessor.HBP; import com.itfsw.query.builder.SqlQueryBuilderFactory; import com.itfsw.query.builder.support.builder.SqlBuilder; @@ -7,7 +7,8 @@ import madgik.exareme.common.consts.HBPConstants; import madgik.exareme.master.engine.iterations.handler.IterationsConstants; import madgik.exareme.master.engine.iterations.handler.IterationsHandlerDFLUtils; import madgik.exareme.master.engine.iterations.state.IterativeAlgorithmState; -import madgik.exareme.master.queryProcessor.composer.Exceptions.ComposerException; +import madgik.exareme.master.gateway.async.handler.HBP.Exceptions.BadUserInputException; +import madgik.exareme.master.queryProcessor.HBP.Exceptions.ComposerException; import madgik.exareme.utils.association.Pair; import madgik.exareme.utils.file.FileUtil; import madgik.exareme.utils.properties.AdpProperties; @@ -41,7 +42,7 @@ public class Composer { * @param algorithmProperties the properties of the algorithm * @return a query for the local database */ - private static String createLocalTableQuery(AlgorithmProperties algorithmProperties) throws ComposerException { + private static String createLocalTableQuery(AlgorithmProperties algorithmProperties) throws BadUserInputException { List<String> variables = new ArrayList<>(); List<String> datasets = new ArrayList<>(); String filters = ""; @@ -52,7 +53,7 @@ public class Composer { if (parameter.getType() == ParameterProperties.ParameterType.column) { for (String variable : Arrays.asList(parameter.getValue().split("[,]"))) { if (variables.contains(variable)) { - throw new ComposerException("Column '" + variable + "' was given twice as input. This is not allowed."); + throw new BadUserInputException("Column '" + variable + "' was given twice as input. This is not allowed."); } variables.add(variable); } @@ -75,7 +76,7 @@ public class Composer { } else if (parameter.getType() == ParameterProperties.ParameterType.dataset) { for (String dataset : Arrays.asList(parameter.getValue().split("[,]"))) { if (datasets.contains(dataset)) { - throw new ComposerException("Dataset '" + dataset + "' was given twice as input. This is not allowed."); + throw new BadUserInputException("Dataset '" + dataset + "' was given twice as input. This is not allowed."); } datasets.add(dataset); } @@ -132,7 +133,7 @@ public class Composer { String algorithmKey, AlgorithmProperties algorithmProperties, int numberOfWorkers - ) throws ComposerException { + ) throws BadUserInputException, ComposerException { // Assigning the proper identifier for the defaultDB // if the dbIdentifier is provided as a parameter or not String dbIdentifier = algorithmProperties.getParameterValue(ComposerConstants.dbIdentifierKey); @@ -484,7 +485,7 @@ public class Composer { String algorithmKey, AlgorithmProperties algorithmProperties, IterativeAlgorithmState.IterativeAlgorithmPhasesModel iterativeAlgorithmPhase - ) throws ComposerException { + ) throws BadUserInputException, ComposerException { if (iterativeAlgorithmPhase == null) throw new ComposerException("Unsupported iterative algorithm phase."); @@ -790,7 +791,7 @@ public class Composer { String algorithmKey, AlgorithmProperties algorithmProperties, IterativeAlgorithmState.IterativeAlgorithmPhasesModel iterativeAlgorithmPhase - ) throws ComposerException { + ) throws BadUserInputException, ComposerException { if (iterativeAlgorithmPhase == null) throw new ComposerException("Unsupported iterative algorithm phase."); @@ -1042,7 +1043,7 @@ public class Composer { return ComposerConstants.getAlgorithmFolderPath(algorithmName) + "/" + iterativeAlgorithmPhase.name() + "/" + iteration; } - private static File[] getLocalGlobalFolders(String algorithmFolderPath){ + private static File[] getLocalGlobalFolders(String algorithmFolderPath) { File[] localGlobalFolders = new File(algorithmFolderPath).listFiles(new FileFilter() { @Override public boolean accept(File pathname) { diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/ComposerConstants.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/ComposerConstants.java similarity index 98% rename from Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/ComposerConstants.java rename to Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/ComposerConstants.java index a5e250d0927579e956271334138a25245a307d20..a2cb3ada1b1b47db3ca90e7a334e4c90c49357af 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/ComposerConstants.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/ComposerConstants.java @@ -1,4 +1,4 @@ -package madgik.exareme.master.queryProcessor.composer; +package madgik.exareme.master.queryProcessor.HBP; import madgik.exareme.utils.properties.AdpProperties; diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/Exceptions/AlgorithmException.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/Exceptions/AlgorithmException.java similarity index 72% rename from Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/Exceptions/AlgorithmException.java rename to Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/Exceptions/AlgorithmException.java index b7ab1b764f5341e4ffbc604fcbef843f81be0cfd..7e240c422f9adb89d9ac245b289c6e170f2ebdc5 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/Exceptions/AlgorithmException.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/Exceptions/AlgorithmException.java @@ -1,4 +1,4 @@ -package madgik.exareme.master.queryProcessor.composer.Exceptions; +package madgik.exareme.master.queryProcessor.HBP.Exceptions; public class AlgorithmException extends Exception{ public AlgorithmException(String algorithmName, String message) { diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/Exceptions/CDEsMetadataException.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/Exceptions/CDEsMetadataException.java similarity index 65% rename from Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/Exceptions/CDEsMetadataException.java rename to Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/Exceptions/CDEsMetadataException.java index 51ecca7da7672d7cfbb36f917be1fbdf2b4f486e..3477e5efb11c11a1f55f4e6feb8cc06a1db130e6 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/Exceptions/CDEsMetadataException.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/Exceptions/CDEsMetadataException.java @@ -1,4 +1,4 @@ -package madgik.exareme.master.queryProcessor.composer.Exceptions; +package madgik.exareme.master.queryProcessor.HBP.Exceptions; public class CDEsMetadataException extends Exception { public CDEsMetadataException(String message) { diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/Exceptions/ComposerException.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/Exceptions/ComposerException.java similarity index 66% rename from Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/Exceptions/ComposerException.java rename to Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/Exceptions/ComposerException.java index c949b06d55163f996efe45daa48fc40f28592939..7113ce73e165429359fd20344cca6f6d0d846e6e 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/Exceptions/ComposerException.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/Exceptions/ComposerException.java @@ -1,4 +1,4 @@ -package madgik.exareme.master.queryProcessor.composer.Exceptions; +package madgik.exareme.master.queryProcessor.HBP.Exceptions; public class ComposerException extends Exception { public ComposerException(String message) { diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/ParameterProperties.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/ParameterProperties.java similarity index 97% rename from Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/ParameterProperties.java rename to Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/ParameterProperties.java index 41b62f08b044b53ba70e81000db1f9a94e5768bc..fd172ab2a8a080098f4778d4203417f142d4c730 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/composer/ParameterProperties.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/ParameterProperties.java @@ -1,6 +1,6 @@ -package madgik.exareme.master.queryProcessor.composer; +package madgik.exareme.master.queryProcessor.HBP; -import madgik.exareme.master.queryProcessor.composer.Exceptions.AlgorithmException; +import madgik.exareme.master.queryProcessor.HBP.Exceptions.AlgorithmException; public class ParameterProperties { private String name; diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/AlgorithmTestExecutionHelper.java b/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/AlgorithmTestExecutionHelper.java index 360cacc1750d94a4d2fba64f63fb3c8710802288..1dd3d1d05891fa762a3158e2623460a01db76496 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/AlgorithmTestExecutionHelper.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/AlgorithmTestExecutionHelper.java @@ -1,7 +1,7 @@ package madgik.exareme.master; import madgik.exareme.master.engine.iterations.IterationsTestGenericUtils; -import madgik.exareme.master.gateway.async.handler.HttpAsyncMiningQueryHandler; +import madgik.exareme.master.gateway.async.handler.HBP.HBPQueryHandler; import org.apache.commons.io.FileUtils; import org.apache.http.HeaderIterator; import org.apache.http.HttpEntity; @@ -299,12 +299,12 @@ public class AlgorithmTestExecutionHelper { else throw new NoSuchElementException("Header for cookies (" + SET_COOKIE_HEADER_NAME + ") does not contain \"" + COOKIE_ALGORITHM_EXECUTION_ID + "\" cookie.\n" - + "Maybe " + HttpAsyncMiningQueryHandler.class.getSimpleName() + + "Maybe " + HBPQueryHandler.class.getSimpleName() + "#handle function's code has been changed."); } else throw new NoSuchElementException("Header for cookies (" + SET_COOKIE_HEADER_NAME + ")" + " has not been set for testing. \nMaybe " - + HttpAsyncMiningQueryHandler.class.getSimpleName() + "#handle function's " + + HBPQueryHandler.class.getSimpleName() + "#handle function's " + "code has been changed to not \"echo\" the algorithm execution id cookie."); } diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/engine/iterations/IterationsFunctionalTests.java b/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/engine/iterations/IterationsFunctionalTests.java deleted file mode 100644 index 09cc3126579786d823896f00bf3f3eff57e34603..0000000000000000000000000000000000000000 --- a/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/engine/iterations/IterationsFunctionalTests.java +++ /dev/null @@ -1,188 +0,0 @@ -package madgik.exareme.master.engine.iterations; - -import junit.framework.Assert; -import madgik.exareme.master.AlgorithmTestExecutionHelper; -import madgik.exareme.master.app.cluster.ExaremeCluster; -import madgik.exareme.master.app.cluster.ExaremeClusterFactory; -import madgik.exareme.master.gateway.ExaremeGateway; -import madgik.exareme.master.gateway.ExaremeGatewayFactory; -import org.apache.commons.lang.text.StrSubstitutor; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; - -import java.io.IOException; -import java.rmi.RemoteException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.concurrent.ExecutionException; - -/** - * Functional tests for iterations module. - * <p> - * To be used in conjunction with {@link AlgorithmTestExecutionHelper}. - * - * @author Christos Aslanoglou <br> caslanoglou@di.uoa.gr <br> University of Athens / Department of - * Informatics and Telecommunications. - */ -public class IterationsFunctionalTests { - private static final Logger log = Logger.getLogger(IterationsFunctionalTests.class); - - private static final String PLACEHOLDER_MAX_ITERATIONS = "placeholder_max_iterations"; - private static final String PLACEHOLDER_CONDITION_QUERY = "placeholder_query_condition"; - private static final String ALGORITHM_JSON_PARAMETERS = - "[{\"name\":\"iterations_max_number\",\"value\":\"${" - + PLACEHOLDER_MAX_ITERATIONS + "}\"}," + - "{\"name\":\"iterations_condition_query_provided\",\"value\":\"${" - + PLACEHOLDER_CONDITION_QUERY + "}\"}]"; - private static final String SELECT_OK_ITERATIVE = "SELECT_OK_ITERATIVE"; - private static final String SAMPLE_ITERATIVE = "SAMPLE_ITERATIVE"; - - - private HashMap<String, String> placeholdersMap; - private AlgorithmTestExecutionHelper executionTestsHelper; - - @BeforeClass - public static void setUp() throws Exception { - Logger.getRootLogger().setLevel(Level.ALL); - // These overwrites must happen before initializing any of the below iteration related - // fields (since they statically get an instance of Composer and thus force its - // initialization, which we want to avoid until having done the overwrites below). - IterationsTestGenericUtils.overwriteHBPConstantsDEMO_ALGOR_WORKDIR(); - IterationsTestGenericUtils.overwriteDemoRepositoryPathGatewayProperty(); - - final ExaremeCluster cluster = ExaremeClusterFactory.createMiniCluster(1098, 8088, 0); - cluster.start(); - final ExaremeGateway gateway = - ExaremeGatewayFactory.createHttpServer(cluster.getDBManager()); - gateway.start(); - Runtime.getRuntime().addShutdownHook(new Thread() { - @Override - public void run() { - gateway.stop(); - try { - cluster.stop(false); - } catch (RemoteException e) { - e.printStackTrace(); - } - } - }); - log.info("Mini cluster & gateway started."); - } - - @Before - public void SetUp() { - placeholdersMap = new HashMap<>(); - executionTestsHelper = new AlgorithmTestExecutionHelper(); - } - - @Test - public void ensureSelectOKResult() throws IOException, InterruptedException, ExecutionException { - final Integer executionId = submitSelectOkAlgorithm(2); - Assert.assertTrue( - executionTestsHelper.sendAlgorithmRequestAndWaitResponse(executionId)); - } - - @Test - public void ensureSampleIterativeResultStopDueMaxIterations() - throws IOException, InterruptedException, ExecutionException { - final Integer algorithmExecId = submitSampleIterativeAlgorithm(2); - Assert.assertTrue( - executionTestsHelper.sendAlgorithmRequestAndWaitResponse(algorithmExecId)); - } - - @Test - public void ensureSampleIterativeResultStopDueConditionQuery() - throws IOException, InterruptedException, ExecutionException { - final Integer algorithmExecId = submitSampleIterativeAlgorithm(10); - Assert.assertTrue( - executionTestsHelper.sendAlgorithmRequestAndWaitResponse(algorithmExecId)); - } - - @Test - public void testConcurrentSelectOkAlgorithm() throws IOException, InterruptedException { - ArrayList<Integer> algorithmExecutionIds = new ArrayList<>(); - - algorithmExecutionIds.add(submitSelectOkAlgorithm(1)); - algorithmExecutionIds.add(submitSelectOkAlgorithm(1)); - algorithmExecutionIds.add(submitSelectOkAlgorithm(2)); - - executionTestsHelper.sendConcurrentAlgorithmRequestsAndAwaitResponses(); - - for (Integer algorithmExecutionId : algorithmExecutionIds) { - Assert.assertTrue( - executionTestsHelper.getAlgorithmExecutionMessage(algorithmExecutionId), - executionTestsHelper.gotExpectedResponse(algorithmExecutionId)); - } - } - - @Test - public void testConcurrentIterativeAlgorithms() throws IOException, InterruptedException { - ArrayList<Integer> algorithmExecutionIds = new ArrayList<>(); - - // Algorithms preparation and submission - algorithmExecutionIds.add(submitSelectOkAlgorithm(1)); - algorithmExecutionIds.add(submitSampleIterativeAlgorithm(10)); - algorithmExecutionIds.add(submitSampleIterativeAlgorithm(2)); - - executionTestsHelper.sendConcurrentAlgorithmRequestsAndAwaitResponses(); - - for (Integer algorithmExecutionId : algorithmExecutionIds) { - Assert.assertTrue( - executionTestsHelper.getAlgorithmExecutionMessage(algorithmExecutionId), - executionTestsHelper.gotExpectedResponse(algorithmExecutionId)); - } - } - - - // Utilities ------------------------------------------------------------------------------ - private String prepareJSONParameters(String maxIterations, String conditionQueryProvided) { - placeholdersMap.put(PLACEHOLDER_MAX_ITERATIONS, maxIterations); - placeholdersMap.put(PLACEHOLDER_CONDITION_QUERY, conditionQueryProvided); - return StrSubstitutor.replace(ALGORITHM_JSON_PARAMETERS, placeholdersMap); - } - - - /** - * Wrapper for submitting a SelectOk algorithm with a given maximum iterations number. - * - * @return the algorithm's execution id - */ - private Integer submitSelectOkAlgorithm(int maxIterationsNumber) { - return executionTestsHelper.addAlgorithmExecutionTest( - SELECT_OK_ITERATIVE, - prepareJSONParameters( - String.valueOf(maxIterationsNumber), - "false"), - "{\"reply\":\"sufficient\"}"); - } - - /** - * Wrapper for submitting a SampleIterative algorithm with a given maximum iterations number. - * - * @return the algorithm's execution id - */ - private Integer submitSampleIterativeAlgorithm(int maxIterationsNumber) { - // Generate expected response - float sumValue = 0; - int currentIterationsNumber = 0; - // Write SampleIterative's termination condition to the respective Java Code - // so that we can generate the correct expected answer. - while (currentIterationsNumber < maxIterationsNumber && sumValue < 5) { - sumValue += currentIterationsNumber + 1; - currentIterationsNumber++; - } - - // currentIterationsNumber needs to be cast as float, since this is how it is retrieved - // from data source - return executionTestsHelper.addAlgorithmExecutionTest( - SAMPLE_ITERATIVE, - prepareJSONParameters(String.valueOf(maxIterationsNumber), "true"), - "{\"sum_value\":" + String.valueOf(sumValue) - + ",\"number_of_iterations\":" - + String.valueOf((float) currentIterationsNumber) - + "}"); - } -} diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/engine/iterations/IterationsTestGenericUtils.java b/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/engine/iterations/IterationsTestGenericUtils.java index a1f875050e85b07a5e7d62adc71872a8af9e4f41..8f7d53c7c71d20f5b598c6ef420f0dcb65a3b467 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/engine/iterations/IterationsTestGenericUtils.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/engine/iterations/IterationsTestGenericUtils.java @@ -2,9 +2,8 @@ package madgik.exareme.master.engine.iterations; import madgik.exareme.common.consts.HBPConstants; import madgik.exareme.master.engine.iterations.handler.IterationsConstants; -import madgik.exareme.master.queryProcessor.composer.AlgorithmProperties; -import madgik.exareme.master.queryProcessor.composer.Composer; -import madgik.exareme.master.queryProcessor.composer.ComposerConstants; +import madgik.exareme.master.queryProcessor.HBP.AlgorithmProperties; +import madgik.exareme.master.queryProcessor.HBP.ComposerConstants; import madgik.exareme.utils.properties.AdpProperties; import madgik.exareme.utils.properties.MutableProperties; diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/engine/iterations/handler/IterationsHandlerTest.java b/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/engine/iterations/handler/IterationsHandlerTest.java index 4cd96ebf1a67200c51e1fb8306898cb4e7475845..96686469560570cf5cdaa5d872710ed4bd2d9e0b 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/engine/iterations/handler/IterationsHandlerTest.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/engine/iterations/handler/IterationsHandlerTest.java @@ -7,8 +7,8 @@ import madgik.exareme.master.engine.iterations.IterationsTestGenericUtils; import madgik.exareme.master.engine.iterations.state.IterationsStateManager; import madgik.exareme.master.engine.iterations.state.IterationsStateManagerImpl; import madgik.exareme.master.engine.iterations.state.IterativeAlgorithmState; -import madgik.exareme.master.queryProcessor.composer.AlgorithmProperties; -import madgik.exareme.master.queryProcessor.composer.Algorithms; +import madgik.exareme.master.queryProcessor.HBP.AlgorithmProperties; +import madgik.exareme.master.queryProcessor.HBP.Algorithms; import madgik.exareme.worker.art.registry.ArtRegistryLocator; import org.apache.commons.io.FileUtils; import org.apache.log4j.Logger; @@ -52,29 +52,10 @@ public class IterationsHandlerTest { stateManager = IterationsStateManagerImpl.getInstance(); algorithmProperties = Algorithms.getInstance().getAlgorithmProperties(algorithmName); - algorithmProperties.mergeAlgorithmParametersWithInputContent( + algorithmProperties.mergeWithAlgorithmParameters( IterationsTestGenericUtils.prepareParameterProperties( algorithmName, "2")); } - // Functional tests ------------------------------------------------------------------------- - @Test - public void ensureIterativeAlgorithmIsSubmittedToStateManager() throws IOException { - String algorithmKey = IterationsTestGenericUtils.generateAlgorithmKey(algorithmProperties); - - final IterativeAlgorithmState ias = handler.handleNewIterativeAlgorithmRequest( - AdpDBManagerLocator.getDBManager(), algorithmKey, algorithmProperties, - ArtRegistryLocator.getArtRegistryProxy().getContainers()); - - final Map<String, IterativeAlgorithmState> iterativeAlgorithmMapping = - Whitebox.getInternalState(stateManager, "iterativeAlgorithmMapping"); - - TestCase.assertFalse(iterativeAlgorithmMapping.isEmpty()); - - // Remove generated files - FileUtils.deleteDirectory(new File( - HBPConstants.DEMO_ALGORITHMS_WORKING_DIRECTORY + "/" - + ias.getAlgorithmKey())); - } } diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/engine/iterations/state/IterativeAlgorithmStateTest.java b/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/engine/iterations/state/IterativeAlgorithmStateTest.java index 7eba89d3662cd82c2de52cac1e26f5715d9ea4b2..2aa0544d83cf5fb224ddae5f67718ea842d993db 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/engine/iterations/state/IterativeAlgorithmStateTest.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/engine/iterations/state/IterativeAlgorithmStateTest.java @@ -9,9 +9,7 @@ import madgik.exareme.master.engine.AdpDBManager; import madgik.exareme.master.engine.AdpDBManagerLocator; import madgik.exareme.master.engine.iterations.handler.IterationsConstants; import madgik.exareme.master.engine.iterations.state.exceptions.IterationsStateFatalException; -import madgik.exareme.master.queryProcessor.composer.AlgorithmProperties; -import madgik.exareme.master.queryProcessor.composer.Algorithms; -import madgik.exareme.worker.art.executionEngine.dynamicExecutionEngine.Algorithm; +import madgik.exareme.master.queryProcessor.HBP.AlgorithmProperties; import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; @@ -23,8 +21,6 @@ import org.powermock.core.classloader.annotations.PrepareForTest; import org.powermock.modules.junit4.PowerMockRunner; import org.powermock.reflect.Whitebox; -import java.util.HashMap; - import static org.powermock.api.mockito.PowerMockito.when; /** diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/queryProcessor/composer/AlgorithmsTest.java b/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/queryProcessor/HBP/AlgorithmsTest.java similarity index 91% rename from Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/queryProcessor/composer/AlgorithmsTest.java rename to Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/queryProcessor/HBP/AlgorithmsTest.java index 8ebb8ce641f6282ca1e11c210af7132af9950ff0..7a890840d4009072e8fb84122ca962bc9faff0f2 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/queryProcessor/composer/AlgorithmsTest.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/test/java/madgik/exareme/master/queryProcessor/HBP/AlgorithmsTest.java @@ -1,7 +1,6 @@ -package madgik.exareme.master.queryProcessor.composer; +package madgik.exareme.master.queryProcessor.HBP; import com.google.gson.Gson; -import madgik.exareme.master.engine.iterations.handler.IterationsConstants; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.junit.Before; diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/.gitignore b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/.gitignore similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/.gitignore rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/.gitignore diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LICENSE b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LICENSE similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LICENSE rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LICENSE diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LINEAR_REGRESSION/1/global.template.sql b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LINEAR_REGRESSION/1/global.template.sql similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LINEAR_REGRESSION/1/global.template.sql rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LINEAR_REGRESSION/1/global.template.sql diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LINEAR_REGRESSION/1/local.template.sql b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LINEAR_REGRESSION/1/local.template.sql similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LINEAR_REGRESSION/1/local.template.sql rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LINEAR_REGRESSION/1/local.template.sql diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LINEAR_REGRESSION/2/global.template.sql b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LINEAR_REGRESSION/2/global.template.sql similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LINEAR_REGRESSION/2/global.template.sql rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LINEAR_REGRESSION/2/global.template.sql diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LINEAR_REGRESSION/2/local.template.sql b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LINEAR_REGRESSION/2/local.template.sql similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LINEAR_REGRESSION/2/local.template.sql rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LINEAR_REGRESSION/2/local.template.sql diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LINEAR_REGRESSION/3/global.template.sql b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LINEAR_REGRESSION/3/global.template.sql similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LINEAR_REGRESSION/3/global.template.sql rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LINEAR_REGRESSION/3/global.template.sql diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LINEAR_REGRESSION/3/local.template.sql b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LINEAR_REGRESSION/3/local.template.sql similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LINEAR_REGRESSION/3/local.template.sql rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LINEAR_REGRESSION/3/local.template.sql diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LINEAR_REGRESSION/4/global.template.sql b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LINEAR_REGRESSION/4/global.template.sql similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LINEAR_REGRESSION/4/global.template.sql rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LINEAR_REGRESSION/4/global.template.sql diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LINEAR_REGRESSION/4/local.template.sql b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LINEAR_REGRESSION/4/local.template.sql similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LINEAR_REGRESSION/4/local.template.sql rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LINEAR_REGRESSION/4/local.template.sql diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LINEAR_REGRESSION/properties.json b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LINEAR_REGRESSION/properties.json similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LINEAR_REGRESSION/properties.json rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LINEAR_REGRESSION/properties.json diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LIST_DATASET/global.template.sql b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LIST_DATASET/global.template.sql similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LIST_DATASET/global.template.sql rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LIST_DATASET/global.template.sql diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LIST_DATASET/local.template.sql b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LIST_DATASET/local.template.sql similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LIST_DATASET/local.template.sql rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LIST_DATASET/local.template.sql diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LIST_DATASET/properties.json b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LIST_DATASET/properties.json similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LIST_DATASET/properties.json rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LIST_DATASET/properties.json diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LIST_VARIABLES/local.template.sql b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LIST_VARIABLES/local.template.sql similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LIST_VARIABLES/local.template.sql rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LIST_VARIABLES/local.template.sql diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LIST_VARIABLES/properties.json b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LIST_VARIABLES/properties.json similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/LIST_VARIABLES/properties.json rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/LIST_VARIABLES/properties.json diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/README.md b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/README.md similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/README.md rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/README.md diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/VARIABLES_HISTOGRAM/1/CreateInputData.sql b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/VARIABLES_HISTOGRAM/1/CreateInputData.sql similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/VARIABLES_HISTOGRAM/1/CreateInputData.sql rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/VARIABLES_HISTOGRAM/1/CreateInputData.sql diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/VARIABLES_HISTOGRAM/1/global.template.sql b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/VARIABLES_HISTOGRAM/1/global.template.sql similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/VARIABLES_HISTOGRAM/1/global.template.sql rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/VARIABLES_HISTOGRAM/1/global.template.sql diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/VARIABLES_HISTOGRAM/1/local.template.sql b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/VARIABLES_HISTOGRAM/1/local.template.sql similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/VARIABLES_HISTOGRAM/1/local.template.sql rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/VARIABLES_HISTOGRAM/1/local.template.sql diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/VARIABLES_HISTOGRAM/2/global.template.sql b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/VARIABLES_HISTOGRAM/2/global.template.sql similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/VARIABLES_HISTOGRAM/2/global.template.sql rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/VARIABLES_HISTOGRAM/2/global.template.sql diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/VARIABLES_HISTOGRAM/2/local.template.sql b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/VARIABLES_HISTOGRAM/2/local.template.sql similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/VARIABLES_HISTOGRAM/2/local.template.sql rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/VARIABLES_HISTOGRAM/2/local.template.sql diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/VARIABLES_HISTOGRAM/properties.json b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/VARIABLES_HISTOGRAM/properties.json similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/VARIABLES_HISTOGRAM/properties.json rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/VARIABLES_HISTOGRAM/properties.json diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/[HBP] Manual for algorithm developers.html b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/[HBP] Manual for algorithm developers.html similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/mip-algorithms/[HBP] Manual for algorithm developers.html rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/mip-algorithms/[HBP] Manual for algorithm developers.html diff --git a/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/properties.json b/Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/properties.json similarity index 100% rename from Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/composer/properties.json rename to Exareme-Docker/src/exareme/exareme-master/src/test/resources/madgik/exareme/master/queryProcessor/HBP/properties.json diff --git a/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/aggregate/highchartsvisualizations.py b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/aggregate/highchartsvisualizations.py deleted file mode 100644 index e83160a32374b0a6556691c4de277114c668dbe2..0000000000000000000000000000000000000000 --- a/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/aggregate/highchartsvisualizations.py +++ /dev/null @@ -1,89 +0,0 @@ -from array import * - -try: - from collections import OrderedDict -except ImportError: - # Python 2.6 - from lib.collections26 import OrderedDict - -__docformat__ = 'reStructuredText en' - - -class highchartheatmap: - # i, j, val, title - # - # ''' chart: { type: 'heatmap', marginTop: 40, marginBottom: 80, plotBorderWidth: 1 }, - # title: { text: 'Sales per employee per weekday' }, - # xAxis: { categories: ['Alexander', 'Marie', 'Maximilian']}, - # yAxis: { categories: ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'], title: null }, - # colorAxis: { min: 0, minColor: '#FFFFFF', maxColor: Highcharts.getOptions().colors[0] }, - # legend: { align: 'right',layout: 'vertical', margin: 0, verticalAlign: 'top', y: 25, symbolHeight: 280 }, - # tooltip: { formatter: function () { - # return '<b>(' + this.series.xAxis.categories[this.point.x] + ", "+ this.series.yAxis.categories[this.point.y]+')= '+ this.point.value + '</b>'; - # } - # }, - # series: [{ - # borderWidth: 1, - # data: [[0, 0, 10], [0, 1, 19], [0, 2, 8], [0, 3, 24], [0, 4, 67], [1, 0, 92], [1, 1, 58], [1, 2, 78], [1, 3, 117], [1, 4, 48], [2, 0, 35], [2, 1, 15], [2, 2, 123], [2, 3, 64], [2, 4, 52]], - # dataLabels: { - # enabled: true, - # color: '#000000' - # } - # }] - # ''' - registered = True # Value to define db operator - - def __init__(self): - self.n = 0 - self.xcategories = [] - self.ycategories = [] - self.mydata = [] - - def step(self, *args): - try: - if str(args[0]) not in self.xcategories: - self.xcategories.append(str(args[0])) - if str(args[1]) not in self.ycategories: - self.ycategories.append(str(args[1])) - - self.mydata.append( - [self.xcategories.index(str(args[0])), self.ycategories.index(str(args[1])), float(args[2])]) - self.title = str(args[3]) - self.xtitle = str(args[4]) - self.ytitle = str(args[5]) - except (ValueError, TypeError): - raise - - def final(self): - # print "self.xcategories", self.xcategories - # print "self.ycategories", self.ycategories - yield ('highchartheatmap',) - # print self.mydata - myresult = "chart: { type: 'heatmap', marginTop: 40, marginBottom: 80, plotBorderWidth: 1 }," - myresult += " title: { text: '" + self.title + "' }," - myresult += " xAxis: { categories: " + str(self.xcategories) + "}," - myresult += " yAxis: { categories: " + str(self.ycategories) + "}," - myresult += " colorAxis: { min: 0, minColor: '#FFFFFF', maxColor: Highcharts.getOptions().colors[0] }," - myresult += " legend: { align: 'right',layout: 'vertical', margin: 0, verticalAlign: 'top', y: 25, symbolHeight: 280 }," - # myresult += " tooltip: { formatter: function () {return '<b>(' + this.series.xAxis.categories[this.point.x] + ", "+ this.series.yAxis.categories[this.point.y]+')= '+ this.point.value + '</b>';}}," - myresult += " series: [{ borderWidth: 1, data: " - myresult += str(self.mydata) - myresult += ", dataLabels: { enabled: true,color: '#000000'}}]" - yield (myresult,) - - -if not ('.' in __name__): - """ - This is needed to be able to test the function, put it at the end of every - new function you create - """ - import sys - from functions import * - - testfunction() - if __name__ == "__main__": - reload(sys) - sys.setdefaultencoding('utf-8') - import doctest - - doctest.testmod() diff --git a/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/row/errorchecking.py b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/row/errorchecking.py index ff1a22fe865d9d0775c0a73157fc1aae2750c62b..84e7a8aa50fb2dbfd7208201f5f67487dacfdb1c 100644 --- a/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/row/errorchecking.py +++ b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/row/errorchecking.py @@ -58,6 +58,15 @@ def holdoutvalidation_inputerrorchecking2(train_size, test_size): holdoutvalidation_inputerrorchecking2.registered = True +def naive_bayes_training_inputerrorchecking(colname,noLevels): + if (noLevels < 2): + raise functions.OperatorError("ExaremeError", colname + ": should contain more than two distinct values") + else: + return "OK" + +naive_bayes_training_inputerrorchecking.registered = True + + # def maxnumberofiterations_errorhandling(maxnumberofiterations,no): # For most of the iterative algorithms # if maxnumberofiterations< no: # raise functions.OperatorError("ExaremeError", "The algorithm could not complete in the max number of iterations given. Please increase the iterations_max_number and try again.") @@ -86,6 +95,16 @@ def histograms_inputerrorchecking(xisCategorical,bins): histograms_inputerrorchecking.registered = True +def pairedttest_inputerrorchecking(variableNo): + if variableNo%2 == 1: + raise functions.OperatorError("ExaremeError", "The input should be in the form of y1-y2,y3-y4,.. Therefore the number of variables should be modulo 2") + else: + return "OK" + +pairedttest_inputerrorchecking.registered = True + + + if not ('.' in __name__): """ This is needed to be able to test the function, put it at the end of every diff --git a/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/categoricalcoding.py b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/categoricalcoding.py index 694183ced7d1ac23b32b19fe8169291ea7e37c0d..fb802dda725df27a7d9a2684fce28b2b4e172bcc 100644 --- a/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/categoricalcoding.py +++ b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/categoricalcoding.py @@ -43,10 +43,10 @@ class categoricalcoding(functions.vtable.vtbase.VT): colval = myrow[i] if colname in metadata.keys(): - print colname,colval + #print colname,colval newcolvals = metadata[colname].split(',') nvals =len(newcolvals) - print nvals + #print nvals # newcolvals.sort() for v in xrange(0,len(newcolvals)): newcolval =newcolvals[v] @@ -73,8 +73,8 @@ class categoricalcoding(functions.vtable.vtbase.VT): else: # gia mh categorical columns: newSchema.append(str(colname)) newrow.append(colval) - print "oldrow",myrow - print "newrow",newrow + # print "oldrow",myrow + # print "newrow",newrow if noRow == 0: yield tuple((x,) for x in newSchema) diff --git a/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/highchartheatmap.py b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/highchartheatmap.py new file mode 100644 index 0000000000000000000000000000000000000000..c8bfb131d0ce710c44cfe437cf2abf0bebed4b65 --- /dev/null +++ b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/highchartheatmap.py @@ -0,0 +1,89 @@ +import setpath +import functions +import json +registered=True + +''' +Highcharts.chart('container', + { "chart": {"type": "heatmap","marginTop": 40,"marginBottom": 80,"plotBorderWidth": 1}, + "title": {"text": " confusion matrix "}, + "xAxis": {"title": { "text": " actual values "},"categories": [ "AD","CN","Other"]}, + "yAxis": {"title": { "text": " predicted values "},"categories": [ "AD", "CN", "Other"]}, + "colorAxis": {"min": 0,"minColor": "#FFFFFF","maxColor": "#6699ff"}, + "legend": {"align": "right","layout": "vertical","margin": 0,"verticalAlign": "top","y": 25,"symbolHeight": 280}, + "series": [{ "borderWidth": 1, "data": [ [ 0, 0, 46], + [ 0, 1, 39], + [ 0, 2, 0], + [ 1, 0, 20], + [ 1, 1,76], + [ 1, 2, 0], + [2, 0, 26], + [ 2, 1,33], + [2, 2,0]], + "dataLabels": {"enabled": true,"color": "#000000" }}]} +); +''' +class highchartheatmap(functions.vtable.vtbase.VT): + def VTiter(self, *parsedArgs,**envars): + largs, dictargs = self.full_parse(parsedArgs) + + if 'query' not in dictargs: + raise functions.OperatorError(__name__.rsplit('.')[-1],"No query argument ") + query = dictargs['query'] + if 'title' not in dictargs: + raise functions.OperatorError(__name__.rsplit('.')[-1],"No title argument ") + if 'xtitle' not in dictargs: + raise functions.OperatorError(__name__.rsplit('.')[-1],"No xtitle argument ") + if 'ytitle' not in dictargs: + raise functions.OperatorError(__name__.rsplit('.')[-1],"No ytitle argument ") + + cur = envars['db'].cursor() + c=cur.execute(query) + schema = cur.getdescriptionsafe() + + mydata = [] + xcategories = [] + ycategories = [] + + for myrow in c: + if str(myrow[0]) not in xcategories: + xcategories.append(str(myrow[0])) + if str(myrow[1]) not in ycategories: + ycategories.append(str(myrow[1])) + mydata.append([xcategories.index(str(myrow[0])), ycategories.index(str(myrow[1])), float(myrow[2])]) + + myresult = { + "type" : "application/vnd.highcharts+json", + "data" :{ "chart": {"type": "heatmap","marginTop": 40,"marginBottom": 80,"plotBorderWidth": 1}, + "title": {"text": str(dictargs['title'])}, + "xAxis": {"title": { "text":str(dictargs['xtitle'])},"categories": xcategories}, + "yAxis": {"title": { "text":str(dictargs['ytitle'])},"categories": ycategories}, + "colorAxis": {"min": 0,"minColor": "#FFFFFF","maxColor": "#6699ff"}, + "legend": {"align": "right","layout": "vertical","margin": 0,"verticalAlign": "top","y": 25,"symbolHeight": 280}, + "series": [{ "borderWidth": 1, "data": mydata, + "dataLabels": {"enabled": True,"color": "#000000" }}] + } + } + myjsonresult = json.dumps(myresult) + yield [('highchartresult',)] + yield (myjsonresult,) + + +def Source(): + return functions.vtable.vtbase.VTGenerator(highchartheatmap) + + +if not ('.' in __name__): + """ + This is needed to be able to test the function, put it at the end of every + new function you create + """ + import sys + import setpath + from functions import * + testfunction() + if __name__ == "__main__": + reload(sys) + sys.setdefaultencoding('utf-8') + import doctest + doctest.tesdoctest.tes diff --git a/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/rconfusionmatrixtable.py b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/rconfusionmatrixtable.py new file mode 100644 index 0000000000000000000000000000000000000000..ea8805dc2c19538b1d3e430f9402faad31515af1 --- /dev/null +++ b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/rconfusionmatrixtable.py @@ -0,0 +1,157 @@ +""" + +""" +import setpath +import functions +import json +import sys +from rpy2.robjects import StrVector +from rpy2.robjects.packages import importr +from rpy2.rinterface import RRuntimeError + +import warnings +warnings.filterwarnings("ignore") + +caret = importr('caret') +e = importr('e1071') +base = importr('base') + +### Classic stream iterator +registered=True + +class rconfusionmatrixtable(functions.vtable.vtbase.VT): #predictedclass,actualclass,val + def VTiter(self, *parsedArgs, **envars): + largs, dictargs = self.full_parse(parsedArgs) + + if 'query' not in dictargs: + raise functions.OperatorError(__name__.rsplit('.')[-1], "No query argument") + query = dictargs['query'] + + cur = envars['db'].cursor() + c = cur.execute(query) + + predictedclasses =[] + actualclasses = [] + classnames = [] + for myrow in c: + for i in xrange(myrow[2]): + predictedclasses.append(myrow[0]) + actualclasses.append(myrow[1]) + if myrow[0] not in classnames: + classnames.append(myrow[0]) + + numberofclassnames = len(classnames) + + print "Predicted vector:", predictedclasses + print "Actual vector:", actualclasses + + #print (classnames) + predictedData = base.factor(base.c(StrVector(predictedclasses)), base.c(StrVector(classnames))) + truthData = base.factor(base.c(StrVector(actualclasses)), base.c(StrVector(classnames))) + Rresult = caret.confusionMatrix(predictedData,truthData) + print 'Rresult[1]', Rresult[1] + print 'Rresult[2]', Rresult[2] + print 'Rresult[3]', Rresult[3] + + ##################################################### + dataOverall = [] + if numberofclassnames == 2: + dataOverall.append(["Positive Class",Rresult[0][0]]) + else: + dataOverall.append(["Positive Class",None]) + + #Rresult[1] -->Table (I have already computed this) + #Rresult[2] -->overall statistics + dataOverall.append(["Accuracy",(Rresult[2][0])]) + dataOverall.append(["Kappa",(Rresult[2][1])]) + dataOverall.append(["Accuracy Lower",(Rresult[2][2])]) + dataOverall.append(["Accuracy Upper",(Rresult[2][3])]) + dataOverall.append(["Accuracy Null",(Rresult[2][4])]) + dataOverall.append(["Accuracy P Value",(Rresult[2][5])]) + dataOverall.append(["Mcnemar P Value",(Rresult[2][6])]) + + ResultOverall = { "data": { + "profile": "tabular-data-resource", + "data": dataOverall, + "name": "Overall Statistic Results", + "schema": { + "fields": [ + { + "type": "text", + "name": "Statistic Name" + }, + { + "type": "real", + "name": "Value" + } + ] + } + }, + "type": "application/vnd.dataresource+json" + } + print "ResultOverall", ResultOverall + ##################################################### + + FieldClassNames = [ + { "type": "text", + "name": "Statistic Name" }] + for i in range(len(classnames)): + FieldClassNames.append( + { + "type": "real", + "name": classnames[i] + " class" + }) + + DataClassNames = [["Sensitivity"],["Specificity"],["Pos Pred Value"],["Neg Pred Value"],["Precision"],["Recall"], + ["F1"],["Prevalence"],["Detection Rate"],["Detection Prevalence"],["Balanced Accuracy"]] + + #Rresult[3] -->byClass statistics + + i = 0 + for k in range(len(DataClassNames)): + for l in range(len(classnames)): + if str(Rresult[3][i])!='nan' and str(Rresult[3][i])!='NA': + DataClassNames[k].append(Rresult[3][i]) + else: + DataClassNames[k].append(None) + i = i + 1 + + ResultClassNames = { + "data": { + "profile": "tabular-data-resource", + "data": DataClassNames, + "name": "Statistic Results per Class", + "schema": {"fields": FieldClassNames} + }, + "type": "application/vnd.dataresource+json"} + + print "resultClassNames", ResultClassNames + + yield (['statscolname'],['statsval'],) + + a = json.dumps(ResultOverall) + #a = a.replace(' ','') + yield ("ResultOverall" , a) + + b = json.dumps(ResultClassNames) + #b = b.replace(' ','') + yield ("ResultClassNames",b) + + +def Source(): + return functions.vtable.vtbase.VTGenerator(rconfusionmatrixtable) + +if not ('.' in __name__): + """ + This is needed to be able to test the function, put it at the end of every + new function you create + """ + import sys + import setpath + from functions import * + testfunction() + if __name__ == "__main__": + reload(sys) + sys.setdefaultencoding('utf-8') + import doctest + doctest.testmod() diff --git a/Exareme-Docker/src/exareme/exareme-utils/src/main/resources/arm.properties b/Exareme-Docker/src/exareme/exareme-utils/src/main/resources/arm.properties index c6f19b45cdf855bf3c5561f6058dfc48beb5407b..3bb9d8f327595ff3b0a749947914f9f859894b61 100644 --- a/Exareme-Docker/src/exareme/exareme-utils/src/main/resources/arm.properties +++ b/Exareme-Docker/src/exareme/exareme-utils/src/main/resources/arm.properties @@ -5,7 +5,7 @@ # -------------------------------------------------------------------------------- # arm.rmi.RmiResourceMediatorProxy.lifetime=30000 arm.rmi.RmiResourceMediator.lifetime=30000 -arm.compute.rmi.RmiArmCompute.lifetime=30000 +arm.compute.rmi.RmiArmCompute.lifetime=100000 arm.cloud.OpenNebulaResourceMediatorInterface.ip= arm.cloud.OpenNebulaResourceMediatorInterface.port= # Storage diff --git a/Exareme-Docker/src/exareme/exareme-utils/src/main/resources/art.properties b/Exareme-Docker/src/exareme/exareme-utils/src/main/resources/art.properties index 14509f66817e0c6c28391cbb291b8857ae42adfd..6335f0fcdcb833b8f34a04ec27063479b3641db9 100644 --- a/Exareme-Docker/src/exareme/exareme-utils/src/main/resources/art.properties +++ b/Exareme-Docker/src/exareme/exareme-utils/src/main/resources/art.properties @@ -5,11 +5,6 @@ # -------------------------------------------------------------------------------- # art.rmi.retryTimes=10 art.rmi.retryPeriod_sec=3 -# Log -# -------------------------------------------------------------------------------- # -art.log.level=INFO -#art.log.level one of (DEBUG/INFO/ERROR). Mind the fact that art.log.level effects how fast or slow the worker starts. -#For example, DEBUG causes worker to slowly get up and running while INFO is more instant # Registry # -------------------------------------------------------------------------------- # art.registry.rmi.defaultPort=1099 @@ -21,7 +16,7 @@ art.registry.rmi.RmiArtRegistryProxy.cacheSize=10 # -------------------------------------------------------------------------------- # art.container.rmi.RmiContainer.defaultID=0 art.container.thread.OperatorMonitorThread.IsAlivePeriod=1 -art.container.rmi.RmiContainer.lifetime=30000 +art.container.rmi.RmiContainer.lifetime=100000 art.container.maxStatusReportPeriod=1000 # Adaptor implementation [rmi, socket] (socket is very efficient with big data). art.container.adaptor.impl=socket @@ -38,8 +33,8 @@ art.container.zipBufferSize_kb=16 art.container.ioBufferSize_kb=512 # Execution engine # -------------------------------------------------------------------------------- # -art.executionEngine.rmi.RmiExecutionEngine.lifetime=30000 -art.executionEngine.forceSessionStopAfter_sec=10 +art.executionEngine.rmi.RmiExecutionEngine.lifetime=100000 +art.executionEngine.forceSessionStopAfter_sec=30 art.executionEngine.dataOperators.memory=2 # centralized = Use a single node # distributed = Use workers given the schedule diff --git a/Exareme-Docker/src/exareme/exareme-utils/src/main/resources/db.properties b/Exareme-Docker/src/exareme/exareme-utils/src/main/resources/db.properties index b29003bbb77899388bb1747241f33eff35b50e4b..55d641d1e3f19a14c90540e3dc4b2e68a0007fdf 100644 --- a/Exareme-Docker/src/exareme/exareme-utils/src/main/resources/db.properties +++ b/Exareme-Docker/src/exareme/exareme-utils/src/main/resources/db.properties @@ -59,7 +59,7 @@ db.optimizer.scheduler.whatif.algs=[] # ---------------------------------------------------------------------------- # db.client.statisticsBuckets=10 # The frequency of the update (in seconds). -db.client.statisticsUpdate_sec=2 +db.client.statisticsUpdate_sec=1 # Force exit after the specified number of seconds. db.client.forceExit_sec=3 # off diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/admin/StartWorker.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/admin/StartWorker.java index 02cbd0aa67f2a25a7ef11922335e3a1e32f359fb..0ee98414b31d933b052bec969655c63d8de63eae 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/admin/StartWorker.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/admin/StartWorker.java @@ -30,7 +30,7 @@ public class StartWorker { String masterRegistryIP = args[0]; int registryPort = AdpProperties.getArtProps().getInt("art.registry.rmi.defaultPort"); int dataTransferPort = AdpProperties.getArtProps().getInt("art.container.data.port"); - String logLevel = AdpProperties.getArtProps().getString("art.log.level"); + String logLevel = System.getenv("LOG_LEVEL"); Logger.getRootLogger().setLevel(Level.toLevel(logLevel)); manager = ArtManagerFactory.createRmiArtManager( diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/arm/compute/rmi/RmiArmCompute.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/arm/compute/rmi/RmiArmCompute.java index a504ebe443558b7853d104d515b77bc3147691c5..33d05976c713b01a31f2ff4994bf391eb143c005 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/arm/compute/rmi/RmiArmCompute.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/arm/compute/rmi/RmiArmCompute.java @@ -63,10 +63,11 @@ public class RmiArmCompute extends RmiRemoteObject<ArmComputeProxy> implements A long lifeTime = AdpProperties.getArmProps().getLong("arm.compute.rmi.RmiArmCompute.lifetime"); - registryUpdateDeamon = RegistryUpdateDeamonFactory.createDeamon(this.createProxy(), (long) (0.75 * lifeTime)); - registryUpdateDeamon.startDeamon(); + if(lifeTime != 0) { + registryUpdateDeamon.startDeamon(); + } } @Override diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/arm/compute/rmi/RmiArmComputeProxy.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/arm/compute/rmi/RmiArmComputeProxy.java index 90cff2dc0111f0c9eb4f14291b7f82d5bf5d044c..17a316e72bf99a41469583080f8a34680b1340d9 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/arm/compute/rmi/RmiArmComputeProxy.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/arm/compute/rmi/RmiArmComputeProxy.java @@ -35,7 +35,11 @@ public class RmiArmComputeProxy extends RmiObjectProxy<ArmCompute> implements Ar long lifeTime = AdpProperties.getArmProps().getLong("arm.compute.rmi.RmiArmCompute.lifetime"); - this.registerPolicy = PolicyFactory.generateTimeExpirationDeletePolicy(lifeTime); + if(lifeTime == 0) { + this.registerPolicy = PolicyFactory.generateNoExpirationPolicy(); + }else { + this.registerPolicy = PolicyFactory.generateTimeExpirationDeletePolicy(lifeTime); + } } @Override diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/container/ContainerSession.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/container/ContainerSession.java index 3fdd537c37c312391ddde463197cb9115cef1ef6..d7883a37e9b8f6ef2514f5791c4a16d8249a4625 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/container/ContainerSession.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/container/ContainerSession.java @@ -5,6 +5,8 @@ package madgik.exareme.worker.art.container; import madgik.exareme.common.art.ContainerSessionID; import madgik.exareme.common.art.PlanSessionID; +import madgik.exareme.worker.art.container.rmi.RmiContainer; +import org.apache.log4j.Logger; import java.io.Serializable; import java.rmi.RemoteException; @@ -19,6 +21,7 @@ import java.rmi.RemoteException; * @since 1.0 */ public class ContainerSession implements Serializable { + private static final Logger log = Logger.getLogger(ContainerSession.class); private static final long serialVersionUID = 1L; private ContainerSessionID containerSessionID = null; @@ -42,7 +45,13 @@ public class ContainerSession implements Serializable { public ContainerJobResults execJobs(ContainerJobs jobs) throws RemoteException { jobs.setSession(containerSessionID, sessionID); - return containerProxy.getRemoteObject().execJobs(jobs); + log.debug("Executing " + jobs.getJobs().size() + " Jobs!"); + for (ContainerJob job : jobs.getJobs()) { + log.debug("Job: " + job.getType().name() + " " + job.toString()); + } + ContainerJobResults results = containerProxy.getRemoteObject().execJobs(jobs); + log.debug("Returning results for jobs from sessionID: " + sessionID.getLongId()); + return results; } public void closeSession() throws RemoteException { diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/container/SessionBased.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/container/SessionBased.java index 97894016ab415cb1ce205148e94e46b7044caecd..ecc8a7a22e7eccc9bfd68cd93a96e057b0337e78 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/container/SessionBased.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/container/SessionBased.java @@ -3,12 +3,13 @@ package madgik.exareme.worker.art.container; import madgik.exareme.common.art.ContainerSessionID; import madgik.exareme.common.art.PlanSessionID; +import java.rmi.Remote; import java.rmi.RemoteException; /** * @author herald */ -public interface SessionBased { +public interface SessionBased extends Remote { void destroyContainerSession(ContainerSessionID containerSessionID, PlanSessionID sessionID) throws RemoteException; diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/container/dataTransfer/rest/RestDataTransferGateway.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/container/dataTransfer/rest/RestDataTransferGateway.java index a57d78b97d2d0d21b279fef11999ad03d57fd793..4dbbde1ba407687e86eea84e08e382a55684631f 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/container/dataTransfer/rest/RestDataTransferGateway.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/container/dataTransfer/rest/RestDataTransferGateway.java @@ -25,7 +25,7 @@ public class RestDataTransferGateway implements DataTransferGateway { private static final Logger log = Logger.getLogger(RestDataTransferGateway.class); // TODO(herald): this looks like a magic number! - private final int threads = 1000; + private final int threads = 20; private final int secondsToWait = 30; private String artRegistry = null; private ArtManager artManager = null; diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/container/rmi/RmiContainer.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/container/rmi/RmiContainer.java index a18876c21b509f65263a104a9823f090a0aacff5..0fa5a297bfca58bfe9a2718ccd2a829294f526c6 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/container/rmi/RmiContainer.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/container/rmi/RmiContainer.java @@ -138,10 +138,11 @@ public class RmiContainer extends RmiRemoteObject<ContainerProxy> implements Con log.debug("Create update deamon ..."); long lifeTime = AdpProperties.getArtProps().getLong("art.container.rmi.RmiContainer.lifetime"); - registryUpdateDeamon = RegistryUpdateDeamonFactory.createDeamon(this.createProxy(), (long) (0.75 * lifeTime)); - registryUpdateDeamon.startDeamon(); + if(lifeTime != 0) { + registryUpdateDeamon.startDeamon(); + } //TODO(DSH): check executor @@ -169,6 +170,7 @@ public class RmiContainer extends RmiRemoteObject<ContainerProxy> implements Con public ContainerJobResults execJobs(ContainerJobs jobs) throws RemoteException { ContainerJobResults results = new ContainerJobResults(); ContainerJobResult result = null; + log.debug("Executing " + jobs.getJobs().size() + " Jobs!"); for (ContainerJob job : jobs.getJobs()) { log.debug("Executing Job: " + job.getType().name() + " " + job.toString()); if (job.getType() diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/container/rmi/RmiContainerProxy.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/container/rmi/RmiContainerProxy.java index 6f5d5e5b3b5a0fd042c85366f3b87ba36561b680..9a0ce146b3e746be8413dab7bea14aeb02e3ea06 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/container/rmi/RmiContainerProxy.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/container/rmi/RmiContainerProxy.java @@ -44,7 +44,11 @@ public class RmiContainerProxy extends RmiObjectProxy<Container> implements Cont regEntityName.getDataTransferPort()); long lifeTime = AdpProperties.getArtProps().getLong("art.container.rmi.RmiContainer.lifetime"); - this.registerPolicy = PolicyFactory.generateTimeExpirationDeletePolicy(lifeTime); + if(lifeTime == 0) { + this.registerPolicy = PolicyFactory.generateNoExpirationPolicy(); + }else{ + this.registerPolicy = PolicyFactory.generateTimeExpirationDeletePolicy(lifeTime); + } } @Override diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/ExecEngineConstants.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/ExecEngineConstants.java index 7d02e31796f0948a23fb9fc5c901a75a3f375099..0f001db2d2949fd1e743cabac496c157b48c644e 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/ExecEngineConstants.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/ExecEngineConstants.java @@ -19,7 +19,7 @@ public class ExecEngineConstants { public static final String PRAGMA_INTER_CONTAINER_DATA_TRANSFER = "inter_container_data_transfer"; - public static final int THREADS_PER_INDEPENDENT_TASKS = 1024; + public static final int THREADS_PER_INDEPENDENT_TASKS = 20; public static double DATA_TRANSFER_MEM = 0.0; private ExecEngineConstants() { diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/containerStatusMgr/PeriodicContainersStatusCheck.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/containerStatusMgr/PeriodicContainersStatusCheck.java index dded4488263c7b96103233b36edafed4d9cdec31..2d1b1775e217a7fc95b715c89f67015f5dd74b22 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/containerStatusMgr/PeriodicContainersStatusCheck.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/containerStatusMgr/PeriodicContainersStatusCheck.java @@ -31,16 +31,14 @@ public class PeriodicContainersStatusCheck { } - public void addConainerToCheck(EntityName container) { + public void addContainerToCheck(EntityName container) { log.debug("Adding container to check: " + container); containersToCheck.add(container); } private class PeriodicCheck extends Thread { public void run() { - int i = 10; while (!planEventScheduler.getState().isTerminated()) { - i--; Set<EntityName> faultyContainers = new HashSet<>(); for (EntityName containerName : containersToCheck) { log.debug("Checking container: " + containerName); @@ -52,11 +50,12 @@ public class PeriodicContainersStatusCheck { log.error("Container connection error: " + e); faultyContainers.add(containerName); } - } if (!faultyContainers.isEmpty()) { if (planEventScheduler != null) { planEventScheduler.containersError(faultyContainers); + log.error("Reported container error and exiting!"); + return; } else { log.error("PlanEventScheduler should not be null!"); } diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/DynamicPlanManager.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/DynamicPlanManager.java index 73d1071c639089beba78a092efd0a22174f17921..fd2bf4c36f366b5a45f2a23ea46a4e725526ee26 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/DynamicPlanManager.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/DynamicPlanManager.java @@ -19,11 +19,14 @@ import org.apache.log4j.Logger; import java.rmi.RemoteException; import java.rmi.ServerException; +import java.util.Arrays; import java.util.HashMap; import java.util.LinkedList; import java.util.Map; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; /** * @author herald @@ -33,6 +36,7 @@ public class DynamicPlanManager implements PlanSessionManagerInterface { private final HashMap<PlanSessionID, PlanSessionReportID> elasticTreeSessions = new HashMap<>(); private EventProcessor eventProcessor = null; private long sessionCount = 0; + ReentrantLock sessionCountLock = new ReentrantLock(); private long containerSessionCount = 0; /* ROOT sessions */ private Map<PlanSessionID, PlanEventScheduler> schedulerMap = null; @@ -70,11 +74,13 @@ public class DynamicPlanManager implements PlanSessionManagerInterface { @Override public void createGlobalScheduler() throws RemoteException { + sessionCountLock.lock(); PlanSessionID sessionID = new PlanSessionID(sessionCount); PlanSessionReportID reportID = new PlanSessionReportID(sessionCount); - reportID.reportManagerProxy = executionEngine.getPlanSessionReportManagerProxy(reportID); sessionCount++; + sessionCountLock.unlock(); + reportID.reportManagerProxy = executionEngine.getPlanSessionReportManagerProxy(reportID); PlanEventScheduler eventScheduler = new PlanEventScheduler(sessionID, reportID, eventProcessor, this, resourceManager, registryProxy); @@ -85,11 +91,13 @@ public class DynamicPlanManager implements PlanSessionManagerInterface { @Override public PlanSessionID createNewSession() throws RemoteException { + sessionCountLock.lock(); PlanSessionID sessionID = new PlanSessionID(sessionCount); PlanSessionReportID reportID = new PlanSessionReportID(sessionCount); - reportID.reportManagerProxy = executionEngine.getPlanSessionReportManagerProxy(reportID); sessionCount++; + sessionCountLock.unlock(); + reportID.reportManagerProxy = executionEngine.getPlanSessionReportManagerProxy(reportID); PlanEventScheduler eventScheduler = new PlanEventScheduler(sessionID, reportID, eventProcessor, this, resourceManager, registryProxy); @@ -103,8 +111,7 @@ public class DynamicPlanManager implements PlanSessionManagerInterface { } @Override - public ContainerSessionID createContainerSession(PlanSessionID planSessionID) - throws RemoteException { + public ContainerSessionID createContainerSession(PlanSessionID planSessionID) { ContainerSessionID containerSessionID = new ContainerSessionID(containerSessionCount); containerSessionCount++; LinkedList<ContainerSessionID> containerSessionIDs = containerSessionMap.get(planSessionID); @@ -124,17 +131,18 @@ public class DynamicPlanManager implements PlanSessionManagerInterface { eventScheduler.closeSession(jobs); eventScheduler.queueIndependentEvents(jobs); Semaphore sem = new Semaphore(0); - if (eventScheduler.getState().isTerminated() == false) { + if (!eventScheduler.getState().isTerminated()) { eventScheduler.getState() .registerTerminationListener(new SemaphoreTerminationListener(sem)); - log.debug( "Waiting '" + forceSessionStopAfter_sec + "' seconds for session to stop ..."); boolean stopped = sem.tryAcquire(forceSessionStopAfter_sec, TimeUnit.SECONDS); - if (stopped == false) { - log.warn("Force stop!"); + if (!stopped) { + log.error("Force stop! SessionID: " + sessionID.getLongId() + "\n" + Arrays.toString(Thread.currentThread().getStackTrace()).concat("\n")); } } + + log.debug("Destroying session with ID: " + sessionID.getLongId()); PlanSessionReportID reportID = eventScheduler.getState().getPlanSessionReportID(); schedulerMap.remove(sessionID); containerSessionMap.remove(sessionID); diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/DynamicStatusManager.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/DynamicStatusManager.java index 9ed7157bf3d9c8ad91b269987056fb12c14bc295..89a6a5efa476be8f2ebd485e68ebb2dfc3be97d6 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/DynamicStatusManager.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/DynamicStatusManager.java @@ -7,6 +7,8 @@ import madgik.exareme.common.art.PlanSessionID; import madgik.exareme.worker.art.executionEngine.session.ActiveExecutionPlan; import madgik.exareme.worker.art.executionEngine.session.ConcreteOperatorStatus; import madgik.exareme.worker.art.executionEngine.statusMgr.PlanSessionStatusManagerInterface; +import madgik.exareme.worker.art.remote.RmiObjectProxy; +import org.apache.log4j.Logger; import java.rmi.RemoteException; import java.util.List; @@ -17,6 +19,7 @@ import java.util.concurrent.Semaphore; */ public class DynamicStatusManager extends EventSchedulerManipulator implements PlanSessionStatusManagerInterface { + private static final Logger log = Logger.getLogger(DynamicStatusManager.class); public DynamicStatusManager() { } @@ -24,6 +27,9 @@ public class DynamicStatusManager extends EventSchedulerManipulator @Override public boolean hasFinished(PlanSessionID sessionID) throws RemoteException { PlanEventScheduler eventScheduler = getSchedulerWithId(sessionID); + if(eventScheduler == null){ + log.error("Scheduler does not exist with SessionID: " + sessionID.getLongId() ); + } return eventScheduler.getState().getPlanSession().getPlanSessionStatus().hasFinished(); } diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/EventSchedulerManipulator.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/EventSchedulerManipulator.java index c9c35b31df30fb61828eecc342c15c170c0d2ed4..1e7dde84d1ada984167ee32ffd68a33fe17d0ce5 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/EventSchedulerManipulator.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/EventSchedulerManipulator.java @@ -5,16 +5,15 @@ package madgik.exareme.worker.art.executionEngine.dynamicExecutionEngine; import madgik.exareme.common.art.PlanSessionID; import madgik.exareme.worker.art.executionEngine.session.PlanSessionReportID; +import org.apache.log4j.Logger; -import java.util.Collection; -import java.util.Collections; -import java.util.LinkedHashMap; -import java.util.Map; +import java.util.*; /** * @author heraldkllapi */ public class EventSchedulerManipulator { + private static final Logger log = Logger.getLogger(EventSchedulerManipulator.class); private PlanEventScheduler globalScheduler = null; private Map<PlanSessionID, PlanEventScheduler> activeSchedulers = null; private Map<PlanSessionReportID, PlanEventScheduler> activeSchedulersReportIdMap = null; diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/PlanEventScheduler.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/PlanEventScheduler.java index f3b014da5af1ad3dadb4b4a69d4cb80ca0bed14b..1c2945a5025a9f13153f8b2f6e2963cd307a3544 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/PlanEventScheduler.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/PlanEventScheduler.java @@ -301,7 +301,6 @@ public class PlanEventScheduler { public void execute(ExecutionPlan plan) throws RemoteException { lock.lock(); try { - log.debug("Plan submitted for execution ..."); EditableExecutionPlan newPlan = preprocessPlan(plan); validatePlan(newPlan); log.debug("PlanAfterPreprocessing: " + newPlan.toString()); diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/PlanEventSchedulerState.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/PlanEventSchedulerState.java index ebb0194890a4e210ee58f7cda1724d5653df2f71..69a70e32ff3df0537f8b333c66c1c5b888d5958b 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/PlanEventSchedulerState.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/PlanEventSchedulerState.java @@ -308,7 +308,7 @@ public class PlanEventSchedulerState { for (String containerName : plan.iterateContainers()) { try { if (!containerName.contains("any")) { - pcsc.addConainerToCheck(plan.getContainer(containerName)); + pcsc.addContainerToCheck(plan.getContainer(containerName)); } } catch (SemanticError semanticError) { semanticError.printStackTrace(); diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/closeContainerSession/CloseContainerSessionEventHandler.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/closeContainerSession/CloseContainerSessionEventHandler.java index f091b7c7f16a535bfdddebc51ff3230243c54151..aa4f95eadbea9dff2fefb4bc0b22db549e6f9697 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/closeContainerSession/CloseContainerSessionEventHandler.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/closeContainerSession/CloseContainerSessionEventHandler.java @@ -16,6 +16,7 @@ import org.apache.log4j.Logger; import java.rmi.RemoteException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -30,6 +31,7 @@ public class CloseContainerSessionEventHandler public static final CloseContainerSessionEventHandler instance = new CloseContainerSessionEventHandler(); private static final long serialVersionUID = 1L; + private static final Logger log = Logger.getLogger(CloseContainerSessionEventHandler.class); public CloseContainerSessionEventHandler() { } @@ -38,17 +40,19 @@ public class CloseContainerSessionEventHandler public void preProcess(CloseContainerSessionEvent event, PlanEventSchedulerState state) throws RemoteException { try { - ExecutorService service = - Executors.newFixedThreadPool(ExecEngineConstants.THREADS_PER_INDEPENDENT_TASKS); ArrayList<GetStatsAndCloseSession> workers = new ArrayList<GetStatsAndCloseSession>(); List<ContainerSession> sessions = state.getContSessions(event.containerSessionID); + ExecutorService service = Executors.newFixedThreadPool(sessions.size()); for (ContainerSession session : sessions) { GetStatsAndCloseSession w = new GetStatsAndCloseSession(session); workers.add(w); service.submit(w); } service.shutdown(); - service.awaitTermination(1, TimeUnit.DAYS); + if(!service.awaitTermination(2, TimeUnit.MINUTES)){ + log.error("Timeout when trying to fetch stats."); + throw new RemoteException("Timeout when trying to fetch stats." + Arrays.toString(Thread.currentThread().getStackTrace())); + } for (GetStatsAndCloseSession w : workers) { state.getStatistics().containerStats.add(w.stats.getStats()); } @@ -85,7 +89,7 @@ class GetStatsAndCloseSession extends Thread { @Override public void run() { try { - log.trace("Closing session: " + session.getSessionID().getLongId()); + log.debug("Closing session: " + session.getSessionID().getLongId() + " , " + this.toString()); ContainerJobs jobs = new ContainerJobs(); jobs.addJob(GetStatisticsJob.instance); results = session.execJobs(jobs); @@ -93,7 +97,9 @@ class GetStatsAndCloseSession extends Thread { session.closeSession(); } catch (RemoteException e) { exception = e; - log.error("Cannot close session", e); + log.error("Cannot close session " + session.getSessionID().getLongId(), e); + }finally{ + log.debug("Closed session: " + session.getSessionID().getLongId() + " , " + this.toString()); } } } diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/containerJobs/ContainerJobsEventListener.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/containerJobs/ContainerJobsEventListener.java index 2b2da22dcf9ba51c3700975492416be8884cc4d3..112dc65e5649516b6c2402be806769d4b9faa8c3 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/containerJobs/ContainerJobsEventListener.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/containerJobs/ContainerJobsEventListener.java @@ -20,7 +20,7 @@ public class ContainerJobsEventListener implements EventListener<ContainerJobsEv public void processed(ContainerJobsEvent event, RemoteException exception, EventProcessor processor) { if (exception != null) { - LogUtils.logException("Create", exception); + LogUtils.logException("Exception on Container Job Handler with sessionID: " + event.session.getSessionID().getLongId(), exception); PlanEventScheduler.engineInternalException(event, exception); } event.done(); diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/independent/IndependentEventsListener.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/independent/IndependentEventsListener.java index 10410a2ff3cea64c8e0b39440c3213831f74a98f..752cd7105c7873e16b712677970b33624ae926e0 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/independent/IndependentEventsListener.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/independent/IndependentEventsListener.java @@ -22,7 +22,7 @@ public class IndependentEventsListener implements EventListener<IndependentEvent public void processed(IndependentEvents event, RemoteException exception, EventProcessor processor) { if (exception != null) { - LogUtils.logException("Independent", exception); + LogUtils.logException("Exception on Independent Event with sessionID: " + event.state.getSessionID().getLongId(), exception); PlanEventScheduler.engineInternalException(event, exception); } event.done(); diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/planTermination/PlanTerminationEventHandler.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/planTermination/PlanTerminationEventHandler.java index 9f90c891cbc90fdf502e67fe32210a3698890300..98cc934ff893260e3630650c19d3db5d94e26b7d 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/planTermination/PlanTerminationEventHandler.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/planTermination/PlanTerminationEventHandler.java @@ -28,15 +28,13 @@ public class PlanTerminationEventHandler implements ExecEngineEventHandler<PlanT @Override public void preProcess(PlanTerminationEvent event, PlanEventSchedulerState state) throws RemoteException { - if (state.isTerminated() == false) { + if (!state.isTerminated()) { for (ContainerProxy proxy : state.getContainerProxies()) { try { - log.debug("closing session of container : " + proxy.getEntityName().getName()); proxy.destroySessions(state.getPlanSessionID()); - - } catch (RemoteException e) { - // state.addException(e); + } catch (Exception e) { + log.error("Cannot close the sessions for proxy: " + proxy, e); // throw new ServerException("Cannot close all sessions", e); } } @@ -54,7 +52,7 @@ public class PlanTerminationEventHandler implements ExecEngineEventHandler<PlanT state.terminationListeners.clear(); log.debug("Triggered " + listenerCount + " listeners!"); } - if (state.isTerminated() == false) { + if (!state.isTerminated()) { state.setTerminated(true); state.getPlanSession().getPlanSessionStatus().setFinished(new Date()); } diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/terminated/OperatorElasticTreeTerminatedEventHandler.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/terminated/OperatorElasticTreeTerminatedEventHandler.java index 5e9a29492d8ddd27e1d11438f3ad417115846516..6a0ed92abd1425d9810bd693d8f4710102899579 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/terminated/OperatorElasticTreeTerminatedEventHandler.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/terminated/OperatorElasticTreeTerminatedEventHandler.java @@ -100,6 +100,7 @@ public class OperatorElasticTreeTerminatedEventHandler state.eventScheduler.queueIndependentEvents(termJobs); IndependentEvents closeJobs = new IndependentEvents(state); + log.info("Operators finished (2), Closing session!: " + state.getPlanSessionID().getLongId()); state.eventScheduler.closeSession(closeJobs); state.eventScheduler.queueIndependentEvents(closeJobs); diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/terminated/OperatorGroupTerminatedEventHandler.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/terminated/OperatorGroupTerminatedEventHandler.java index e787735880f5911bd27a83e400a0012bf2ba9cde..0252b0041143bd2feb2abc323a8e839cb6321986 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/terminated/OperatorGroupTerminatedEventHandler.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/dynamicExecutionEngine/event/terminated/OperatorGroupTerminatedEventHandler.java @@ -71,7 +71,7 @@ public class OperatorGroupTerminatedEventHandler activeOperator.exitMessage = event.exitMessage; activeOperator.exitDate = new Date(); // Check if the group has terminated - if ((activeGroup.hasError == false) && group.hasTerminated) { + if (!activeGroup.hasError && group.hasTerminated) { log.trace("Operator Group Terminated: " + group.toString()); state.groupDependencySolver().setTerminated(group); // Close the container sessions @@ -90,6 +90,7 @@ public class OperatorGroupTerminatedEventHandler state.eventScheduler.queueIndependentEvents(termJobs); IndependentEvents closeJobs = new IndependentEvents(state); + log.info("Operators finished (1), Closing session! ID: " + state.getPlanSessionID().getLongId()); state.eventScheduler.closeSession(closeJobs); state.eventScheduler.queueIndependentEvents(closeJobs); } diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/rmi/RmiExecutionEngine.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/rmi/RmiExecutionEngine.java index 6999192ebfdad9371ea9436b456e769e372f493c..e902941d277df3b054ce549b8b991ee671aae8b5 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/rmi/RmiExecutionEngine.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/rmi/RmiExecutionEngine.java @@ -30,6 +30,7 @@ import madgik.exareme.worker.art.managementBean.ExecutionEngineManagement; import madgik.exareme.worker.art.managementBean.ManagementUtil; import madgik.exareme.worker.art.registry.ArtRegistryLocator; import madgik.exareme.worker.art.registry.ArtRegistryProxy; +import madgik.exareme.worker.art.registry.PolicyFactory; import madgik.exareme.worker.art.registry.updateDeamon.RegistryUpdateDeamon; import madgik.exareme.worker.art.registry.updateDeamon.RegistryUpdateDeamonFactory; import madgik.exareme.worker.art.remote.RmiRemoteObject; @@ -93,11 +94,11 @@ public class RmiExecutionEngine extends RmiRemoteObject<ExecutionEngineProxy> long lifeTime = AdpProperties.getArtProps() .getLong("art.executionEngine.rmi.RmiExecutionEngine.lifetime"); - registryUpdateDeamon = RegistryUpdateDeamonFactory.createDeamon(this.createProxy(), (long) (0.75 * lifeTime)); - - registryUpdateDeamon.startDeamon(); + if(lifeTime != 0) { + registryUpdateDeamon.startDeamon(); + } } @Override diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/rmi/RmiExecutionEngineProxy.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/rmi/RmiExecutionEngineProxy.java index ab3b0510a361d9e7ffede93fe607f34e8ed30581..61c387fda3989828c0cb2d4de6981ef218ff3728 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/rmi/RmiExecutionEngineProxy.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/rmi/RmiExecutionEngineProxy.java @@ -43,7 +43,11 @@ public class RmiExecutionEngineProxy extends RmiObjectProxy<ExecutionEngine> this.entityName = new EntityName(regEntryName, ip); long lifeTime = AdpProperties.getArtProps() .getLong("art.executionEngine.rmi.RmiExecutionEngine.lifetime"); - this.registerPolicy = PolicyFactory.generateTimeExpirationDeletePolicy(lifeTime); + if(lifeTime == 0) { + this.registerPolicy = PolicyFactory.generateNoExpirationPolicy(); + }else{ + this.registerPolicy = PolicyFactory.generateTimeExpirationDeletePolicy(lifeTime); + } } @Override diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/rmi/RmiPlanSessionReportManagerProxy.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/rmi/RmiPlanSessionReportManagerProxy.java index 4dbb7b873fa3d4db0897d358ec0d48d19ca4fe4b..d87c822f9699fe3d26e49e2aff4b04072afb14ba 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/rmi/RmiPlanSessionReportManagerProxy.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/rmi/RmiPlanSessionReportManagerProxy.java @@ -52,11 +52,12 @@ public class RmiPlanSessionReportManagerProxy extends RmiObjectProxy<PlanSession // System.out.println("RPSMP before get"); log.debug("Reporting operatorSuccess: " + operatorID.operatorName); boolean success = false; - int max_retries = 1000; + int max_retries = 100; // System.out.println("RPSMP after get"); while (!success && max_retries > 0) { try { max_retries--; + PlanSessionReportManager rmo = super.getRemoteObject(); rmo.operatorSuccess(operatorID, exidCode, exitMessage, time, containerID, internalSessionID, terminateGroup); @@ -65,7 +66,7 @@ public class RmiPlanSessionReportManagerProxy extends RmiObjectProxy<PlanSession // System.out.println("RPSMP after success"); // System.out.println("DoneOperatorSuccess: " + operatorID.operatorName + " " + " " + exidCode); } catch (Exception e) { - System.out.println("RPSMP ERROR: " + e); + log.error("RPSMP ERROR: ", e); //this.operatorSuccess(operatorID, exidCode, exitMessage, time, containerID); } } diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/rmi/RmiPlanSessionStatusManager.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/rmi/RmiPlanSessionStatusManager.java index e1c600e948f6f301fa556d46f5a43d4508c04cf1..82eb38b129217994889e23064adc97ce64c01607 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/rmi/RmiPlanSessionStatusManager.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/rmi/RmiPlanSessionStatusManager.java @@ -48,7 +48,6 @@ public class RmiPlanSessionStatusManager extends RmiRemoteObject<PlanSessionStat } public boolean hasFinished(PlanSessionID sessionID) throws RemoteException { - return statusManagerInterface.hasFinished(sessionID); } diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/session/ExecutionEngineSessionPlan.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/session/ExecutionEngineSessionPlan.java index f3b595766d0a897e529ee063b2b19f36c6326599..b5a99e1af13b1e09edfa6a1a853d44038f36daaf 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/session/ExecutionEngineSessionPlan.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/executionEngine/session/ExecutionEngineSessionPlan.java @@ -94,7 +94,7 @@ public class ExecutionEngineSessionPlan implements Serializable { public void close() throws RemoteException { lock.lock(); try { - if (isClosed == false) { + if (!isClosed) { engine.destroySession(sessionID); isClosed = true; } diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/registry/resourceStorage/MemoryResourceStorage.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/registry/resourceStorage/MemoryResourceStorage.java index ad52fe8a0404088e642811411f6624457c8502a0..853909a0225cf666ff7787429340aae0972d6de7 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/registry/resourceStorage/MemoryResourceStorage.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/registry/resourceStorage/MemoryResourceStorage.java @@ -7,6 +7,7 @@ import madgik.exareme.common.art.entity.EntityName; import madgik.exareme.worker.art.registry.Registerable; import madgik.exareme.worker.art.registry.Registerable.Type; import madgik.exareme.worker.art.registry.RegistryResourceStorage; +import org.apache.log4j.Logger; import java.rmi.NoSuchObjectException; import java.rmi.RemoteException; @@ -21,6 +22,7 @@ import java.util.concurrent.Semaphore; * @since 1.0 */ public class MemoryResourceStorage implements RegistryResourceStorage { + private static Logger log = Logger.getLogger(MemoryResourceStorage.class); private Semaphore semaphore = null; private RegistryResourceStorageStatus registryResourceStorageStatus; @@ -52,12 +54,12 @@ public class MemoryResourceStorage implements RegistryResourceStorage { } l.add(r); - semaphore.release(); } catch (Exception e) { - semaphore.release(); throw new RemoteException( "Cannot store object: '" + r.getEntityName().getName() + "' at " + r.getEntityName() .getIP() + ":" + r.getEntityName().getPort(), e); + } finally { + semaphore.release(); } } @@ -66,7 +68,7 @@ public class MemoryResourceStorage implements RegistryResourceStorage { try { semaphore.acquire(); Registerable r = objectMap.get(epr.getName()); - semaphore.release(); + if (r == null) { throw new NoSuchObjectException( "Object was not found: '" + epr.getName() + "' at " + epr.getIP() + ":" + epr @@ -74,8 +76,9 @@ public class MemoryResourceStorage implements RegistryResourceStorage { } return r; } catch (Exception e) { - semaphore.release(); throw new RemoteException("Cannot retrieve object.", e); + } finally { + semaphore.release(); } } @@ -93,10 +96,11 @@ public class MemoryResourceStorage implements RegistryResourceStorage { registryResourceStorageStatus.decreaseStoredObjects(); List<Registerable> l = typeMap.get(r.getType()); l.remove(r); - semaphore.release(); + } catch (Exception e) { - semaphore.release(); throw new RemoteException("Cannot delete object.", e); + } finally { + semaphore.release(); } } @@ -106,9 +110,10 @@ public class MemoryResourceStorage implements RegistryResourceStorage { try { semaphore.acquire(); col = typeMap.get(type); - semaphore.release(); } catch (InterruptedException ex) { throw new ServerException("Cannot retrieve all objects of type: " + type, ex); + } finally { + semaphore.release(); } return col; } @@ -133,9 +138,10 @@ public class MemoryResourceStorage implements RegistryResourceStorage { while (it.hasNext()) { col.addAll(it.next()); } - semaphore.release(); } catch (InterruptedException ex) { throw new ServerException("Cannot retrieve all objects", ex); + } finally { + semaphore.release(); } return col; } diff --git a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/remote/RmiObjectProxy.java b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/remote/RmiObjectProxy.java index a0b674ce3d0cb714a0ef5406aeb72eb286fe0ac3..fe3ba54a99f38bc72dd0135c914bd7da3cbbaec5 100644 --- a/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/remote/RmiObjectProxy.java +++ b/Exareme-Docker/src/exareme/exareme-worker/src/main/java/madgik/exareme/worker/art/remote/RmiObjectProxy.java @@ -1,28 +1,14 @@ -/** +/* * Copyright MaDgIK Group 2010 - 2015. */ package madgik.exareme.worker.art.remote; -import com.google.gson.Gson; import madgik.exareme.common.art.entity.EntityName; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpDelete; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; -import org.apache.http.util.EntityUtils; import org.apache.log4j.Logger; -import java.io.IOException; import java.rmi.AccessException; import java.rmi.RemoteException; -import java.rmi.ServerException; import java.rmi.registry.Registry; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.Objects; -import java.util.concurrent.Semaphore; /** * University of Athens / @@ -34,8 +20,8 @@ import java.util.concurrent.Semaphore; public abstract class RmiObjectProxy<T> implements ObjectProxy<T> { private static final long serialVersionUID = 1L; private static final Logger log = Logger.getLogger(RmiObjectProxy.class); - private EntityName regEntityName = null; - private String regEntryName = null; + private final EntityName regEntityName; + private final String regEntryName; private transient T remoteObject = null; private transient boolean isConnected = false; @@ -49,12 +35,14 @@ public abstract class RmiObjectProxy<T> implements ObjectProxy<T> { int tries = 0; while (true) { try { - log.trace("Connecting to (" + tries + ") " + + log.debug("Connecting to (" + tries + ") " + regEntityName.getIP() + ":" + regEntityName.getPort() + " ..."); tries++; Registry registry = RmiRegistryCache.getRegistry(regEntityName); remoteObject = (T) registry.lookup(regEntryName); isConnected = true; + log.debug("Connected to " + + regEntityName.getIP() + ":" + regEntityName.getPort() + " ..."); return remoteObject; } catch (Exception e) { log.error("Cannot connect to " + @@ -75,163 +63,19 @@ public abstract class RmiObjectProxy<T> implements ObjectProxy<T> { @Override public T getRemoteObject() throws RemoteException { - String name = null; - Iterator<Map.Entry<String, String>> entries; - Gson gson = new Gson(); - Semaphore semaphore = new Semaphore(1); - if (isConnected == false) { + if (!isConnected) { try { - connect(); //try to connect to remote object. If the connection is failing, maybe java is not running + connect(); // try to connect to remote object. If the connection is failing, maybe java is not running } catch (RemoteException exception) { - - //Get the Exareme's node name that is not responding - HashMap<String,String> names = null; - try { - semaphore.acquire(); - names = getNamesOfActiveNodes(); - for (Map.Entry<String, String> entry : names.entrySet()) { - log.debug("ActiveNodes from Consul key-value store: " + entry.getKey() + " = " + entry.getValue()); - } - } catch (IOException | InterruptedException e) { - e.printStackTrace(); - } - - if(names != null) { - entries = names.entrySet().iterator(); - while (entries.hasNext()) { - Map.Entry<String, String> entry = entries.next(); - if (Objects.equals(entry.getKey(), regEntityName.getIP())) { - name = entry.getValue(); - log.info("Found node with name: "+name+" that seems to be down.."); - - try { - String pathologyKey = searchConsul(System.getenv("DATA") + "/" + name + "?keys"); - String[] pathologyKeyArray = gson.fromJson(pathologyKey, String[].class); - for( String p: pathologyKeyArray) { - deleteFromConsul(p); //Delete every pathology for node with name $name - } - deleteFromConsul(System.getenv("EXAREME_ACTIVE_WORKERS_PATH") + "/" + name); - log.info("Worker node:[" + name + "," + regEntityName.getIP() + "]" + " removed from Consul key-value store"); - } catch (IOException E) { - throw new RemoteException("Can not contact Consul Key value Store"); - } - break; - } - } - } - throw new RemoteException("There was an error with worker "+ "["+ regEntityName.getIP() + "]."); - } - finally { - boolean acquired = semaphore.tryAcquire(); - if (!acquired) { - semaphore.release(); - } + throw new RemoteException("There was an error with worker " + "[" + regEntityName.getIP() + "]."); } } return remoteObject; } - private HashMap <String,String> getNamesOfActiveNodes() throws IOException { - Gson gson = new Gson(); - HashMap <String,String> map = new HashMap<>(); - String masterKey = searchConsul(System.getenv("EXAREME_MASTER_PATH")+"/?keys"); - String[] masterKeysArray = gson.fromJson(masterKey, String[].class); - - String masterName = masterKeysArray[0].replace(System.getenv("EXAREME_MASTER_PATH")+"/", ""); - String masterIP = searchConsul(System.getenv("EXAREME_MASTER_PATH")+"/"+masterName+"?raw"); - map.put(masterIP,masterName); - - String workersKey = searchConsul(System.getenv("EXAREME_ACTIVE_WORKERS_PATH")+"/?keys"); - if (workersKey == null) //No workers running - return map; //return master only - String[] workerKeysArray = gson.fromJson(workersKey, String[].class); - for(String worker: workerKeysArray){ - String workerName = worker.replace(System.getenv("EXAREME_ACTIVE_WORKERS_PATH")+"/", ""); - String workerIP = searchConsul(System.getenv("EXAREME_ACTIVE_WORKERS_PATH")+"/"+workerName+"?raw"); - map.put(workerIP,workerName); - } - return map; - } - - private String searchConsul(String query) throws IOException { - String result = null; - CloseableHttpClient httpclient = HttpClients.createDefault(); - String consulURL = System.getenv("CONSULURL"); - if (consulURL == null) throw new IOException("Consul url not set"); - if (!consulURL.startsWith("http://")) { - consulURL = "http://" + consulURL; - } - try { - HttpGet httpGet; - httpGet = new HttpGet(consulURL + "/v1/kv/" + query); - log.debug("Running: " + httpGet.getURI()); - CloseableHttpResponse response = null; - if (httpGet.toString().contains(System.getenv("EXAREME_MASTER_PATH")+"/") || httpGet.toString().contains(System.getenv("DATA")+"/")) { //if we can not contact : http://exareme-keystore:8500/v1/kv/master* or http://exareme-keystore:8500/v1/kv/datasets* - try { //then throw exception - response = httpclient.execute(httpGet); - if (response.getStatusLine().getStatusCode() != 200) { - throw new ServerException("Cannot contact consul", new Exception(EntityUtils.toString(response.getEntity()))); - } else { - result = EntityUtils.toString(response.getEntity()); - } - } finally { - response.close(); - } - } - if (httpGet.toString().contains(System.getenv("EXAREME_ACTIVE_WORKERS_PATH")+"/")) { //if we can not contact : http://exareme-keystore:8500/v1/kv/active_workers* - try { //then maybe there are no workers running - response = httpclient.execute(httpGet); - if (response.getStatusLine().getStatusCode() != 200) { - if (httpGet.toString().contains("?keys")) - log.debug("No workers running. Continue with master"); - } else { - result = EntityUtils.toString(response.getEntity()); - } - } finally { - response.close(); - } - } - } finally { - return result; - } - } - - private String deleteFromConsul(String query) throws IOException { - String result = null; - CloseableHttpClient httpclient = HttpClients.createDefault(); - String consulURL = System.getenv("CONSULURL"); - if (consulURL == null) throw new IOException("Consul url not set"); - if (!consulURL.startsWith("http://")) { - consulURL = "http://" + consulURL; - } - try { - HttpDelete httpDelete; - httpDelete = new HttpDelete(consulURL + "/v1/kv/" + query); - - //curl -X DELETE $CONSULURL/v1/kv/$DATASETS/$NODE_NAME - //curl -X DELETE $CONSULURL/v1/kv/$1/$NODE_NAME - log.debug("Running: " + httpDelete.getURI()); - CloseableHttpResponse response = null; - if (httpDelete.toString().contains(System.getenv("EXAREME_ACTIVE_WORKERS_PATH")+"/") || httpDelete.toString().contains(System.getenv("DATA")+"/")) { //if we can not contact : http://exareme-keystore:8500/v1/kv/master* or http://exareme-keystore:8500/v1/kv/datasets* - try { //then throw exception - response = httpclient.execute(httpDelete); - if (response.getStatusLine().getStatusCode() != 200) { - throw new ServerException("Cannot contact consul", new Exception(EntityUtils.toString(response.getEntity()))); - } else { - result = EntityUtils.toString(response.getEntity()); - } - } finally { - response.close(); - } - } - }finally { - return result; - } - } - @Override - public RetryPolicy getRetryPolicy() throws RemoteException { + public RetryPolicy getRetryPolicy() { return RetryPolicyFactory.defaultRetryPolicy(); } } diff --git a/Exareme-Docker/src/madisServer/MadisServer.py b/Exareme-Docker/src/madisServer/MadisServer.py index 538ec1d252f981aecb6955dde404e574f8a43929..599888d577fd8851923dfbb717dc2e20b49caa30 100644 --- a/Exareme-Docker/src/madisServer/MadisServer.py +++ b/Exareme-Docker/src/madisServer/MadisServer.py @@ -3,6 +3,7 @@ from tornado import gen from tornado.log import enable_pretty_logging from tornado.options import define, options import logging +import os PROCESSES_PER_CPU = 2 WEB_SERVER_PORT=8888 @@ -31,7 +32,10 @@ class MainHandler(BaseHandler): formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') hdlr.setFormatter(formatter) logger.addHandler(hdlr) - logger.setLevel(logging.DEBUG) + if os.environ['LOG_LEVEL'] == "DEBUG": + logger.setLevel(logging.DEBUG) + else: + logger.setLevel(logging.INFO) access_log = logging.getLogger("tornado.access") app_log = logging.getLogger("tornado.application") @@ -66,7 +70,7 @@ class MainHandler(BaseHandler): str_result=self.execQuery(dbFilename,query) except QueryExecutionException as e: #raise tornado.web.HTTPError(status_code=500,log_message="...the log message??") - self.logger.debug("(MadisServer::post) QueryExecutionException: {}".format(str(e))) + self.logger.error("(MadisServer::post) QueryExecutionException: {}".format(str(e))) #print "QueryExecutionException ->{}".format(str(e)) self.set_status(500) self.write(str(e)) diff --git a/Exareme-Docker/src/mip-algorithms/ANOVA/properties.json b/Exareme-Docker/src/mip-algorithms/ANOVA/properties.json index 3d7fd140bf7ebae54755143785efbe34214bb6e7..7dc79b5e1330d2c8936b6f5cff9dccd2d8a6d168 100644 --- a/Exareme-Docker/src/mip-algorithms/ANOVA/properties.json +++ b/Exareme-Docker/src/mip-algorithms/ANOVA/properties.json @@ -1,7 +1,7 @@ { "name": "ANOVA", "desc": "Two-way analysis of variance (ANOVA)", - "label": "ANOVA", + "label": "Two-way ANOVA", "type": "iterative", "parameters": [{ "name": "x", diff --git a/Exareme-Docker/src/mip-algorithms/ANOVA_ONEWAY/__init__.py b/Exareme-Docker/src/mip-algorithms/ANOVA_ONEWAY/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2088f28a8566edf770cb3475888ecb178967b106 --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/ANOVA_ONEWAY/__init__.py @@ -0,0 +1,3 @@ +from .anova import Anova + +__all__ = ["Anova"] diff --git a/Exareme-Docker/src/mip-algorithms/ANOVA_ONEWAY/anova.py b/Exareme-Docker/src/mip-algorithms/ANOVA_ONEWAY/anova.py new file mode 100644 index 0000000000000000000000000000000000000000..3d06a6121a3fdc53bf357b21205d294f2f8cd139 --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/ANOVA_ONEWAY/anova.py @@ -0,0 +1,327 @@ +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import itertools + +import numpy as np +import pandas as pd +import scipy.stats +from statsmodels.stats.libqsturng import psturng + +from mipframework import Algorithm +from mipframework import AlgorithmResult +from mipframework import TabularDataResource +from mipframework.highcharts import LineWithErrorbars +from utils.algorithm_utils import ExaremeError + + +class Anova(Algorithm): + def __init__(self, cli_args): + super(Anova, self).__init__(__file__, cli_args, intercept=False) + + def local_(self): + X = self.data.full + variable = self.parameters.y[0] + covariable = self.parameters.x[0] + var_label = self.metadata.label[variable] + covar_label = self.metadata.label[covariable] + covar_enums = self.metadata.enumerations[covariable] + + model = AdditiveAnovaModel(X, variable, covariable) + + self.push_and_add(model=model) + self.push_and_agree(var_label=var_label) + self.push_and_agree(covar_label=covar_label) + self.push_and_agree(covar_enums=covar_enums) + + def global_(self): + model = self.fetch("model") + var_label = self.fetch("var_label") + covar_label = self.fetch("covar_label") + covar_enums = self.fetch("covar_enums") + + if len(model.group_stats) < 2: + raise ExaremeError("Cannot perform Anova when there is only one level") + + res = model.get_anova_table() + + anova_table = TabularDataResource( + fields=["", "df", "sum sq", "mean sq", "F value", "Pr(>F)"], + data=[ + [ + covar_label, + res["df_explained"], + res["ss_explained"], + res["ms_explained"], + res["f_stat"], + res["p_value"], + ], + [ + "Residual", + res["df_residual"], + res["ss_residual"], + res["ms_residual"], + None, + None, + ], + ], + title="Anova Summary", + ) + + tukey_data = pairwise_tuckey(model, covar_enums) + tukey_hsd_table = TabularDataResource( + fields=list(tukey_data.columns), + data=list([list(row) for row in tukey_data.values]), + title="Tuckey Honest Significant Differences", + ) + tukey_dict = tukey_table_to_dict(tukey_data) + + mean_plot = create_mean_plot( + model.group_stats, var_label, covar_label, covar_enums + ) + + self.result = AlgorithmResult( + raw_data={"anova_table": res, "tukey_table": tukey_dict}, + tables=[anova_table, tukey_hsd_table], + highcharts=[mean_plot], + ) + + +class AdditiveAnovaModel(object): + def __init__(self, X=None, variable=None, covariable=None): + self._table = None + if X is not None and variable and covariable: + self.variable = variable + self.covariable = covariable + self.var_sq = variable + "_sq" + X[self.var_sq] = X[variable] ** 2 + + self.n_obs = X.shape[0] + + self.overall_stats = self.get_overall_stats(X) + + self.group_stats = self.get_group_stats(X) + + def __add__(self, other): + result = AdditiveAnovaModel() + + assert self.variable == other.variable, "variable names do not agree" + result.variable = self.variable + + assert self.covariable == other.covariable, "covariable names do not agree" + result.covariable = self.covariable + + result.n_obs = self.n_obs + other.n_obs + + result.overall_stats = self.overall_stats + other.overall_stats + + result.group_stats = self.group_stats.add(other.group_stats, fill_value=0) + + return result + + def get_overall_stats(self, X): + variable = self.variable + var_sq = self.var_sq + overall_stats = X[variable].agg(["count", "sum"]) + overall_ssq = X[var_sq].sum() + overall_stats = overall_stats.append( + pd.Series(data=overall_ssq, index=["sum_sq"]) + ) + return overall_stats + + def get_group_stats(self, X): + variable = self.variable + covar = self.covariable + var_sq = self.var_sq + group_stats = X[[variable, covar]].groupby(covar).agg(["count", "sum"]) + group_stats.columns = ["count", "sum"] + group_ssq = X[[var_sq, covar]].groupby(covar).sum() + group_ssq.columns = ["sum_sq"] + group_stats = group_stats.join(group_ssq) + return group_stats + + def get_df_explained(self): + return len(self.group_stats) - 1 + + def get_df_residual(self): + return self.n_obs - len(self.group_stats) + + def get_ss_residual(self): + overall_sum_sq = self.overall_stats["sum_sq"] + group_sum = self.group_stats["sum"] + group_count = self.group_stats["count"] + return overall_sum_sq - sum(group_sum ** 2 / group_count) + + def get_ss_total(self): + overall_sum_sq = self.overall_stats["sum_sq"] + overall_sum = self.overall_stats["sum"] + overall_count = self.overall_stats["count"] + return overall_sum_sq - (overall_sum ** 2 / overall_count) + + def get_ss_explained(self): + group_sum = self.group_stats["sum"] + group_count = self.group_stats["count"] + return sum((self.overall_mean - group_sum / group_count) ** 2 * group_count) + + def get_anova_table(self): + df_explained = self.get_df_explained() + df_residual = self.get_df_residual() + ss_explained = self.get_ss_explained() + ss_residual = self.get_ss_residual() + ms_explained = ss_explained / df_explained + ms_residual = ss_residual / df_residual + f_stat = ms_explained / ms_residual + p_value = 1 - scipy.stats.f.cdf(f_stat, df_explained, df_residual) + return dict( + df_explained=df_explained, + df_residual=df_residual, + ss_explained=ss_explained, + ss_residual=ss_residual, + ms_explained=ms_explained, + ms_residual=ms_residual, + f_stat=f_stat, + p_value=p_value, + ) + + @property + def table(self): + if self._table is None: + table = pd.DataFrame( + columns=["df", "sum_sq", "mean_sq", "F", "PR(>F)"], + index=[self.covariable, "Residual"], + ) + df_explained = self.get_df_explained() + df_residual = self.get_df_residual() + ss_explained = self.get_ss_explained() + ss_residual = self.get_ss_residual() + ms_explained = ss_explained / df_explained + ms_residual = ss_residual / df_residual + f_stat = ms_explained / ms_residual + p_value = 1 - scipy.stats.f.cdf(f_stat, df_explained, df_residual) + table.loc[self.covariable] = { + "df": df_explained, + "sum_sq": ss_explained, + "mean_sq": ms_explained, + "F": f_stat, + "PR(>F)": p_value, + } + table.loc["Residual"] = { + "df": df_residual, + "sum_sq": ss_residual, + "mean_sq": ms_residual, + "F": None, + "PR(>F)": None, + } + self._table = table + return table + + return self._table + + @property + def overall_mean(self): + return self.overall_stats["sum"] / self.overall_stats["count"] + + def to_dict(self): # useful for debugging + dd = { + "variable": self.variable, + "covariable": self.covariable, + "n_obs": self.n_obs, + "overall_stats": self.overall_stats.tolist(), + "group_stats": self.group_stats.values.tolist(), + } + return dd + + +def create_mean_plot(group_stats, variable, covariable, categories): + title = "Means plot: {v} ~ {c}".format(v=variable, c=covariable) + means = group_stats["sum"] / group_stats["count"] + variances = group_stats["sum_sq"] / group_stats["count"] - means ** 2 + sample_vars = (group_stats["count"] - 1) / group_stats["count"] * variances + sample_stds = np.sqrt(sample_vars) + + categories = [c for c in categories if c in group_stats.index] + means = [means[cat] for cat in categories] + sample_stds = [sample_stds[cat] for cat in categories] + data = [[m - s, m, m + s] for m, s in zip(means, sample_stds)] + return LineWithErrorbars( + title=title, + data=data, + categories=categories, + xname=covariable, + yname="95% CI: " + variable, + ) + + +def pairwise_tuckey(aov, categories): + categories = np.array(aov.group_stats.index) + n_groups = len(categories) + gnobs = aov.group_stats["count"].to_numpy() + gmeans = (aov.group_stats["sum"] / aov.group_stats["count"]).to_numpy() + gvar = aov.table.at["Residual", "mean_sq"] / gnobs + g1, g2 = np.array(list(itertools.combinations(np.arange(n_groups), 2))).T + mn = gmeans[g1] - gmeans[g2] + se = np.sqrt(gvar[g1] + gvar[g2]) + tval = mn / se + df = aov.table.at["Residual", "df"] + pval = psturng(np.sqrt(2) * np.abs(tval), n_groups, df) + thsd = pd.DataFrame( + columns=[ + "A", + "B", + "mean(A)", + "mean(B)", + "diff", + "Std.Err.", + "t value", + "Pr(>|t|)", + ], + index=range(n_groups * (n_groups - 1) // 2), + ) + thsd["A"] = categories[g1] + thsd["B"] = categories[g2] + thsd["mean(A)"] = gmeans[g1] + thsd["mean(B)"] = gmeans[g2] + thsd["diff"] = mn + thsd["Std.Err."] = se + thsd["t value"] = tval + thsd["Pr(>|t|)"] = pval + return thsd + + +def tukey_table_to_dict(table): + tukey_dict = [] + for _, row in table.iterrows(): + tukey_row = dict() + tukey_row["groupA"] = row["A"] + tukey_row["groupB"] = row["B"] + tukey_row["meanA"] = row["mean(A)"] + tukey_row["meanB"] = row["mean(B)"] + tukey_row["diff"] = row["diff"] + tukey_row["se"] = row["Std.Err."] + tukey_row["t_stat"] = row["t value"] + tukey_row["p_tukey"] = row["Pr(>|t|)"] + tukey_dict.append(tukey_row) + return tukey_dict + + +if __name__ == "__main__": + import time + from mipframework import create_runner + + algorithm_args = [ + "-y", + "rightmprgprecentralgyrusmedialsegment", + "-x", + "alzheimerbroadcategory", + "-pathology", + "dementia", + "-dataset", + "ppmi,adni", + "-filter", + "", + ] + runner = create_runner(Anova, algorithm_args=algorithm_args, num_workers=3,) + start = time.time() + runner.run() + end = time.time() diff --git a/Exareme-Docker/src/mip-algorithms/ANOVA_ONEWAY/generate_tescases_anova.py b/Exareme-Docker/src/mip-algorithms/ANOVA_ONEWAY/generate_tescases_anova.py new file mode 100644 index 0000000000000000000000000000000000000000..a83c88ccc65f9909e5867e6c5e1987b7d8436a3c --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/ANOVA_ONEWAY/generate_tescases_anova.py @@ -0,0 +1,84 @@ +# XXX this module runs on python 3.9 because we need pingouin to calculate +# Tukey table. + +from pathlib import Path + +import numpy as np +from pingouin import pairwise_tukey +import statsmodels.api as sm +from statsmodels.formula.api import ols +from statsmodels.stats.libqsturng import psturng + +from mipframework.algorithmtest import AlgorithmTest +from mipframework.exceptions import PrivacyError +from utils.algorithm_utils import PRIVACY_MAGIC_NUMBER + + +class AnovaTest(AlgorithmTest): + def get_expected(self, alg_input): + # Get data and remove missing values + x_names = alg_input[0]["value"] + y_names = alg_input[1]["value"] + datasets = alg_input[3]["value"] + variables = y_names + if x_names != "": + variables += "," + x_names + try: + data = self.get_data(variables, datasets) + except PrivacyError: + return None + data = data.dropna() + n_obs = len(data) + n_groups = len(set(data[x_names])) + + # If n_obs < n_cols reject + if n_obs <= PRIVACY_MAGIC_NUMBER: + return None + if data[x_names].unique().shape[0] < 2: + return None + + # Anova + formula = "{y} ~ {x}".format(y=y_names, x=x_names) + lm = ols(formula, data=data).fit() + aov = sm.stats.anova_lm(lm) + result = aov.to_dict() + + # # Tukey test + tukey = pairwise_tukey(data=data, dv=y_names, between=x_names) + tukey_results = [] + for _, row in tukey.iterrows(): + tukey_result = dict() + tukey_result["groupA"] = row["A"] + tukey_result["groupB"] = row["B"] + tukey_result["meanA"] = row["mean(A)"] + tukey_result["meanB"] = row["mean(B)"] + tukey_result["diff"] = row["diff"] + tukey_result["se"] = row["se"] + tukey_result["t_stat"] = row["T"] + # computing pval because pingouin and statsmodels implementations + # of pstrung do not agree + pval = psturng( + np.sqrt(2) * np.abs(row["T"]), n_groups, result["df"]["Residual"] + ) + tukey_result["p_tukey"] = float(pval) + tukey_results.append(tukey_result) + + expected_out = dict() + expected_out["df_residual"] = result["df"]["Residual"] + expected_out["df_explained"] = result["df"][x_names] + expected_out["ss_residual"] = result["sum_sq"]["Residual"] + expected_out["ss_explained"] = result["sum_sq"][x_names] + expected_out["ms_residual"] = result["mean_sq"]["Residual"] + expected_out["ms_explained"] = result["mean_sq"][x_names] + expected_out["p_value"] = result["PR(>F)"][x_names] + expected_out["f_stat"] = result["F"][x_names] + expected_out["tukey_test"] = tukey_results + + return expected_out + + +if __name__ == "__main__": + prop_path = dbs_folder = Path(__file__).parent / "properties.json" + algtest = AnovaTest(prop_path.as_posix()) + algtest.generate_test_cases(num_tests=100) + algtest.to_json("anova_expected.json") diff --git a/Exareme-Docker/src/mip-algorithms/ANOVA_ONEWAY/global.py b/Exareme-Docker/src/mip-algorithms/ANOVA_ONEWAY/global.py new file mode 100644 index 0000000000000000000000000000000000000000..e21a8753f1507f0dca3b1aeaa1c60d707b4b57f9 --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/ANOVA_ONEWAY/global.py @@ -0,0 +1,10 @@ +import sys +from anova import Anova + + +def main(args): + Anova(args[1:]).global_() + + +if __name__ == "__main__": + Anova(sys.argv[1:]).global_() diff --git a/Exareme-Docker/src/mip-algorithms/ANOVA_ONEWAY/local.py b/Exareme-Docker/src/mip-algorithms/ANOVA_ONEWAY/local.py new file mode 100644 index 0000000000000000000000000000000000000000..3bfefe58b8f468a08e9a763d11d313059b3ca4d9 --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/ANOVA_ONEWAY/local.py @@ -0,0 +1,10 @@ +import sys +from anova import Anova + + +def main(args): + Anova(args[1:]).local_() + + +if __name__ == "__main__": + Anova(sys.argv[1:]).local_() diff --git a/Exareme-Docker/src/mip-algorithms/ANOVA_ONEWAY/properties.json b/Exareme-Docker/src/mip-algorithms/ANOVA_ONEWAY/properties.json new file mode 100644 index 0000000000000000000000000000000000000000..467517d209baf428a9e90fb8d1c458a9324e903f --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/ANOVA_ONEWAY/properties.json @@ -0,0 +1,60 @@ +{ + "name": "ANOVA_ONEWAY", + "desc": "One-way analysis of variance (ANOVA)", + "label": "One-way ANOVA", + "type": "python_local_global", + "parameters": [{ + "name": "x", + "label": "x", + "desc": "A categorical variable on whose levels we perform the Anova.", + "type": "column", + "columnValuesSQLType": "", + "columnValuesIsCategorical": "true", + "columnValuesNumOfEnumerations": "", + "value": "ppmicategory", + "valueNotBlank": true, + "valueMultiple": false, + "valueType": "string" + }, { + "name": "y", + "label": "y", + "desc": "A numerical response variable.", + "type": "column", + "columnValuesSQLType": "real,integer", + "columnValuesIsCategorical": "false", + "columnValuesNumOfEnumerations": "", + "value": "lefthippocampus", + "valueNotBlank": true, + "valueMultiple": false, + "valueType": "string" + }, { + "name": "pathology", + "label": "pathology", + "desc": "The name of the pathology in which the dataset belongs to.", + "type": "pathology", + "value": "dementia", + "valueNotBlank": true, + "valueMultiple": false, + "valueType": "string" + }, { + "name": "dataset", + "label": "dataset", + "desc": "The names of one or more datasets, in which the algorithm will be executed.", + "type": "dataset", + "value": "ppmi", + "valueNotBlank": true, + "valueMultiple": true, + "valueType": "string" + }, { + "name": "filter", + "label": "filter", + "desc": "", + "type": "filter", + "value": "", + "valueNotBlank": false, + "valueMultiple": true, + "valueType": "string" + } + ] +} + diff --git a/Exareme-Docker/src/mip-algorithms/CART/cart_lib.py b/Exareme-Docker/src/mip-algorithms/CART/cart_lib.py index 85e596c010984477c6931d3e4d82e93f36eaaa0a..2254331227741e86afe4fa8d3e63400f4f141c07 100644 --- a/Exareme-Docker/src/mip-algorithms/CART/cart_lib.py +++ b/Exareme-Docker/src/mip-algorithms/CART/cart_lib.py @@ -106,7 +106,7 @@ class Node: "threshold" : None if self.gain == 0 or (self.right is None and self.left is None) else self.threshold, "criterion" :self.criterion, "gain" : self.gain, - #"samples" : "Less than " + str(PRIVACY_MAGIC_NUMBER) if self.samples <= PRIVACY_MAGIC_NUMBER else self.samples, + "samples" : "Less than " + str(PRIVACY_MAGIC_NUMBER) if self.samples <= PRIVACY_MAGIC_NUMBER else self.samples, #"samplesPerClass" : samplesPerClass, "classValue" : myclassValue, "class" : myclass, diff --git a/Exareme-Docker/src/mip-algorithms/CROSS_VALIDATION_K_FOLD/properties.json b/Exareme-Docker/src/mip-algorithms/CROSS_VALIDATION_K_FOLD/properties.json index 10d98d0abd2c06984c6bd3eb4d5012dffd625a79..d04b7628c76fd32f823b7c46db028272123d9e8a 100644 --- a/Exareme-Docker/src/mip-algorithms/CROSS_VALIDATION_K_FOLD/properties.json +++ b/Exareme-Docker/src/mip-algorithms/CROSS_VALIDATION_K_FOLD/properties.json @@ -51,7 +51,7 @@ "desc": "Number of pieces the dataset will be split", "type": "other", "value": "3", - "valueMin": 1, + "valueMin": 2, "valueNotBlank": true, "valueMultiple": false, "valueType": "integer" diff --git a/Exareme-Docker/src/mip-algorithms/LOGISTIC_REGRESSION/generate_testcases_logistic_regression.py b/Exareme-Docker/src/mip-algorithms/LOGISTIC_REGRESSION/generate_testcases_logistic_regression.py index 325808245dfad51b21fa672d287716fbb1688a26..b940aab7946d41e46aea2cca0da06f2311f70141 100644 --- a/Exareme-Docker/src/mip-algorithms/LOGISTIC_REGRESSION/generate_testcases_logistic_regression.py +++ b/Exareme-Docker/src/mip-algorithms/LOGISTIC_REGRESSION/generate_testcases_logistic_regression.py @@ -3,7 +3,6 @@ WARNING: This one needs python3.6 because the 2.7 version of sklearn doesn't support Logistic Regression without regularization. """ import json - from pathlib import Path from random import shuffle @@ -19,13 +18,9 @@ class LogisticRegressionTest(AlgorithmTest): x_names = alg_input[0]["value"] y_name = alg_input[1]["value"] variables = x_names + "," + y_name - data = self.get_data(variables) + datasets = alg_input[3]["value"] + data = self.get_data(variables, datasets=datasets) data = data.dropna() - n_obs = len(data) - - # If n_obs < n_cols reject - if n_obs == 0 or data.shape[0] < data.shape[1]: - return None # Select two categories at random for y categories = list(set(data[y_name])) @@ -35,35 +30,43 @@ class LogisticRegressionTest(AlgorithmTest): cat_0, cat_1 = categories[:2] # Build filter - filter_ = { - "condition": "OR", - "rules": [ - { - "id": y_name, - "field": y_name, - "type": "string", - "input": "text", - "operator": "equal", - "value": cat_0, - }, - { - "id": y_name, - "field": y_name, - "type": "string", - "input": "text", - "operator": "equal", - "value": cat_1, - }, - ], - "valid": True, - } - alg_input[4]["value"] = json.dumps(filter_) + # filter_ = { + # "condition": "OR", + # "rules": [ + # { + # "id": y_name, + # "field": y_name, + # "type": "string", + # "input": "text", + # "operator": "equal", + # "value": cat_0, + # }, + # { + # "id": y_name, + # "field": y_name, + # "type": "string", + # "input": "text", + # "operator": "equal", + # "value": cat_1, + # }, + # ], + # "valid": True, + # } + # alg_input[4]["value"] = json.dumps(filter_) + alg_input[4]["value"] = "" + alg_input[5]["value"] = cat_0 + alg_input[6]["value"] = cat_1 # Filter data according to above filter data = data[(data[y_name] == cat_0) | (data[y_name] == cat_1)] y = data[y_name] X = data[x_names.split(",")] + # If n_obs < n_cols reject + n_obs = len(data) + if n_obs == 0 or data.shape[0] < data.shape[1]: + return None + # Reject when one class appears less times than then number of columns if any([len(y[y == item]) <= X.shape[1] for item in set(y)]): return None diff --git a/Exareme-Docker/src/mip-algorithms/LOGISTIC_REGRESSION/logistic_regression.py b/Exareme-Docker/src/mip-algorithms/LOGISTIC_REGRESSION/logistic_regression.py index 2c3e9e41b3bfe14260d1ae091d734eb010095887..2849eb34ffa979b521015b535c2f68a8ebfa7781 100644 --- a/Exareme-Docker/src/mip-algorithms/LOGISTIC_REGRESSION/logistic_regression.py +++ b/Exareme-Docker/src/mip-algorithms/LOGISTIC_REGRESSION/logistic_regression.py @@ -2,6 +2,7 @@ from __future__ import division from __future__ import print_function from __future__ import unicode_literals +import re from collections import namedtuple import numpy as np @@ -19,6 +20,8 @@ from mipframework.constants import ( CONFIDENCE, ) from utils.algorithm_utils import ExaremeError +from utils.algorithm_utils import PrivacyError +from utils.algorithm_utils import PRIVACY_MAGIC_NUMBER class LogisticRegression(Algorithm): @@ -26,9 +29,12 @@ class LogisticRegression(Algorithm): super(LogisticRegression, self).__init__(__file__, cli_args) def local_init(self): - y, X = self.data.variables.iloc[:, 1], self.data.covariables + negl = self.parameters.negative_level + posl = self.parameters.positive_level + X, y = self.data.covariables, self.data.variables + X, y = keep_levels(X, y, positive_level=posl, negative_level=negl) - n_obs = len(y) # todo make these variables automatically available on global + n_obs = len(y) n_cols = len(X.columns) y_name = y.name x_names = list(X.columns) @@ -36,6 +42,8 @@ class LogisticRegression(Algorithm): n_y_pos = len(y[y == 1]) n_y_neg = len(y[y == 0]) + self.store(y=y) + self.store(X=X) self.push_and_add(n_obs=n_obs) self.push_and_add(n_y_pos=n_y_pos) self.push_and_add(n_y_neg=n_y_neg) @@ -63,7 +71,7 @@ class LogisticRegression(Algorithm): self.push(coeff=coeff) def local_step(self): - y, X = self.data.variables.iloc[:, 1], self.data.covariables + y, X = self.load("y"), self.load("X") coeff = self.fetch("coeff") grad, hess, ll = update_local_model_parameters(X, y, coeff) @@ -95,10 +103,10 @@ class LogisticRegression(Algorithm): self.push(coeff=coeff) def local_final(self): - y = self.data.variables.iloc[:, 1] + y, X = self.load("y"), self.load("X") thresholds = np.linspace(1.0, 0.0, num=2 ** 7 + 1) # odd otherwise no half_idx - yhats = np.array([self.predict(threshold=thr) for thr in thresholds]) + yhats = np.array([self.predict(x=X, threshold=thr) for thr in thresholds]) fn, fp, tn, tp = compute_classification_results(y, yhats) half_idx = np.where(thresholds == 0.5)[0][0] @@ -232,6 +240,28 @@ class LogisticRegression(Algorithm): ) +def keep_levels(X, y, positive_level, negative_level): + if len(y) > 0: + posl_pattern = r"[^\[]+\[{pl}\]".format(pl=re.escape(positive_level)) + posl_idx = [ + re.search(posl_pattern, colname) is not None for colname in y.columns + ].index(True) + negl_pattern = r"[^\[]+\[{nl}\]".format(nl=re.escape(negative_level)) + negl_idx = [ + re.search(negl_pattern, colname) is not None for colname in y.columns + ].index(True) + keep_rows = np.logical_or( + y.iloc[:, negl_idx] == 1.0, y.iloc[:, posl_idx] == 1.0 + ) + X, y = X[keep_rows], y[keep_rows] + y = y.iloc[:, posl_idx] + if y.shape[0] < PRIVACY_MAGIC_NUMBER: + raise PrivacyError("Query results in illegal number of datapoints.") + else: + raise PrivacyError("Query results in illegal number of datapoints.") + return X, y + + def init_model(n_cols, n_obs): ll = -2 * n_obs * np.log(2) coeff = np.zeros(n_cols) @@ -429,35 +459,16 @@ if __name__ == "__main__": "-dataset", "adni", "-filter", - """ - { - "condition": "OR", - "rules": [ - { - "id": "alzheimerbroadcategory", - "field": "alzheimerbroadcategory", - "type": "string", - "input": "text", - "operator": "equal", - "value": "AD" - }, - { - "id": "alzheimerbroadcategory", - "field": "alzheimerbroadcategory", - "type": "string", - "input": "text", - "operator": "equal", - "value": "CN" - } - ], - "valid": true - } - """, + "", "-formula", "", + "-positive_level", + "AD", + "-negative_level", + "CN", ] runner = create_runner( - LogisticRegression, num_workers=1, algorithm_args=algorithm_args, + LogisticRegression, num_workers=10, algorithm_args=algorithm_args, ) start = time.time() runner.run() diff --git a/Exareme-Docker/src/mip-algorithms/LOGISTIC_REGRESSION/properties.json b/Exareme-Docker/src/mip-algorithms/LOGISTIC_REGRESSION/properties.json index 1d0a4ba915085ce6a22b07e071a93deb4e18dc22..71ba2dab1ed752278f57199b94fc8bd8fbd1f676 100644 --- a/Exareme-Docker/src/mip-algorithms/LOGISTIC_REGRESSION/properties.json +++ b/Exareme-Docker/src/mip-algorithms/LOGISTIC_REGRESSION/properties.json @@ -9,8 +9,8 @@ "label": "x", "desc": "A list of variables from database. The variable should be Real, Integer. It cannot be empty", "type": "column", - "columnValuesSQLType": "real, integer, text", - "columnValuesIsCategorical": "", + "columnValuesSQLType": "real, integer", + "columnValuesIsCategorical": "false", "value": "leftaccumbensarea, leftacgganteriorcingulategyrus, leftainsanteriorinsula, rightaccumbensarea, rightacgganteriorcingulategyrus, rightainsanteriorinsula", "valueNotBlank": true, "valueMultiple": true, @@ -68,6 +68,28 @@ "valueNotBlank": false, "valueMultiple": false, "valueType": "string" + }, + { + "name": "positive_level", + "label": "Positive level", + "desc": "Level of the target variable to assign to the positive outcome.", + "type": "other", + "value": "", + "defaultValue": "", + "valueNotBlank": true, + "valueMultiple": false, + "valueType": "string" + }, + { + "name": "negative_level", + "label": "Negative level", + "desc": "Level of the target variable to assign to the negative outcome.", + "type": "other", + "value": "", + "defaultValue": "", + "valueNotBlank": true, + "valueMultiple": false, + "valueType": "string" } ] } diff --git a/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/1/__init__.py b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/1/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/1/global.py b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/1/global.py new file mode 100644 index 0000000000000000000000000000000000000000..9bff44ede48ef3277192e9ca14eb4949eb0379c2 --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/1/global.py @@ -0,0 +1,10 @@ +import sys +from NAIVE_BAYES import NaiveBayes + + +def main(args): + NaiveBayes(args[1:]).global_init() + + +if __name__ == "__main__": + NaiveBayes(sys.argv[1:]).global_init() diff --git a/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/1/local.py b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/1/local.py new file mode 100644 index 0000000000000000000000000000000000000000..efa3c8caeec002630f5a438509baeade45c40639 --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/1/local.py @@ -0,0 +1,10 @@ +import sys +from NAIVE_BAYES import NaiveBayes + + +def main(args): + NaiveBayes(args[1:]).local_init() + + +if __name__ == "__main__": + NaiveBayes(sys.argv[1:]).local_init() diff --git a/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/2/__init__.py b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/2/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/2/global.py b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/2/global.py new file mode 100644 index 0000000000000000000000000000000000000000..03107ca0ed45f370d04f873f473e8a5c2511c9ab --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/2/global.py @@ -0,0 +1,10 @@ +import sys +from NAIVE_BAYES import NaiveBayes + + +def main(args): + NaiveBayes(args[1:]).global_final() + + +if __name__ == "__main__": + NaiveBayes(sys.argv[1:]).global_final() diff --git a/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/2/local.py b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/2/local.py new file mode 100644 index 0000000000000000000000000000000000000000..dc7fa2a4b7754c22df229ef256964c0202b02a9f --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/2/local.py @@ -0,0 +1,10 @@ +import sys +from NAIVE_BAYES import NaiveBayes + + +def main(args): + NaiveBayes(args[1:]).local_final() + + +if __name__ == "__main__": + NaiveBayes(sys.argv[1:]).local_final() diff --git a/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/__init__.py b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1561642fdcee33fe5c1fe82001731a5b7cd5e0a0 --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/__init__.py @@ -0,0 +1,3 @@ +from .naive_bayes import NaiveBayes + +__all__ = ["NaiveBayes"] diff --git a/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/naive_bayes.py b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/naive_bayes.py new file mode 100644 index 0000000000000000000000000000000000000000..31e81b87540da1ff57f5f5392bd92ceb8fe8c283 --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/naive_bayes.py @@ -0,0 +1,524 @@ +from __future__ import print_function +from __future__ import division +from __future__ import unicode_literals + +from collections import Counter +import warnings + +import numpy as np +from sklearn import metrics +from sklearn.naive_bayes import GaussianNB +from sklearn.naive_bayes import BaseDiscreteNB + +from mipframework import Algorithm +from mipframework import AlgorithmResult +from mipframework import TabularDataResource +from mipframework.funclib.crossvalidation import kfold_split_design_matrices +from mipframework.funclib.crossvalidation import AdditiveMulticlassROCCurve +from mipframework.funclib.crossvalidation import AdditiveMulticlassClassificationReport +from mipframework.highcharts.user_defined import MultilabelConfisionMatrix +from mipframework.highcharts.user_defined import MulticlassROCCurve + + +class NaiveBayes(Algorithm): + def __init__(self, cli_args): + super(NaiveBayes, self).__init__(__file__, cli_args, intercept=False) + + def local_init(self): + data = self.data.full + y, X = data[self.parameters.y], data[self.parameters.x] + categ_names = [k for k, v in self.metadata.is_categorical.items() if v == 1] + categ_names.remove(self.parameters.y[0]) + numer_names = [k for k, v in self.metadata.is_categorical.items() if v == 0] + X_cat = np.array(X[categ_names]) if categ_names else None + X_num = np.array(X[numer_names]) if numer_names else None + if X_num is not None and X_cat is not None: + xtypes = "both" + elif X_num is not None: + xtypes = "numerical" + elif X_cat is not None: + xtypes = "categorical" + y = np.array(y) + n_splits = int(self.parameters.k) + + matrices_to_split = [y] + if X_num is not None: + matrices_to_split.append(X_num) + if X_cat is not None: + matrices_to_split.append(X_cat) + train_sets, test_sets = kfold_split_design_matrices( + n_splits, *matrices_to_split + ) + models = [ + MixedAdditiveNB(float(self.parameters.alpha)) for _ in range(n_splits) + ] + if xtypes == "numerical": + [m.fit(yt, X_num=Xt) for m, (yt, Xt) in zip(models, train_sets)] + elif xtypes == "categorical": + [m.fit(yt, X_cat=Xt) for m, (yt, Xt) in zip(models, train_sets)] + elif xtypes == "both": + [m.fit(yt, Xnt, Xct) for m, (yt, Xnt, Xct) in zip(models, train_sets)] + + self.store(train_sets=train_sets) + self.store(test_sets=test_sets) + self.store(y=y) + self.store(xtypes=xtypes) + self.push_and_agree(n_splits=n_splits) + for k in range(n_splits): + self.push_and_add(**{"model" + str(k): models[k]}) + + def global_init(self): + n_splits = self.fetch("n_splits") + models = [self.fetch("model" + str(k)) for k in range(n_splits)] + if models[0].gnb: + classes = models[0].gnb.classes_ + else: + classes = models[0].cnb.classes_ + + self.store(classes=classes) + for k in range(n_splits): + self.push_and_add(**{"model" + str(k): models[k]}) + + def local_final(self): + n_splits = int(self.parameters.k) + y = self.load("y") + n_obs = len(y) + test_sets = self.load("test_sets") + xtypes = self.load("xtypes") + models = [self.fetch("model" + str(k)) for k in range(n_splits)] + classes = models[0].classes_ + n_classes = len(classes) + + if xtypes == "numerical": + y_preds = [m.predict(X_num=Xt) for m, (_, Xt) in zip(models, test_sets)] + elif xtypes == "categorical": + y_preds = [m.predict(X_cat=Xt) for m, (_, Xt) in zip(models, test_sets)] + else: + y_preds = [ + m.predict(Xnt, Xct) for m, (_, Xnt, Xct) in zip(models, test_sets) + ] + y_pred = np.array(y).flatten() + idx = 0 + for yp in y_preds: + y_pred[idx : idx + len(yp)] = yp + idx += len(yp) + + if xtypes == "numerical": + y_pred_proba_per_class_kfold = [ + m.predict_proba(X_num=Xt) for m, (_, Xt) in zip(models, test_sets) + ] + elif xtypes == "categorical": + y_pred_proba_per_class_kfold = [ + m.predict_proba(X_cat=Xt) for m, (_, Xt) in zip(models, test_sets) + ] + else: + y_pred_proba_per_class_kfold = [ + m.predict_proba(Xnt, Xct) for m, (_, Xnt, Xct) in zip(models, test_sets) + ] + y_pred_proba_per_class = np.empty((n_obs, n_classes)) + idx = 0 + for yp in y_pred_proba_per_class_kfold: + y_pred_proba_per_class[idx : idx + len(yp)] = yp + idx += len(yp) + + confusion_matrix = metrics.confusion_matrix(y, y_pred) + accuracy = metrics.accuracy_score(y, y_pred) + + roc_curve = AdditiveMulticlassROCCurve( + y_true=y, y_pred_proba_per_class=y_pred_proba_per_class, classes=classes + ) + + classification_report = AdditiveMulticlassClassificationReport( + y_true=y, y_pred=y_pred, classes=classes + ) + + self.push_and_add(n_obs=n_obs) + self.push_and_add(confusion_matrix=confusion_matrix) + self.push_and_add(accuracy=Mediant(accuracy * n_obs, n_obs)) + self.push_and_add(roc_curve=roc_curve) + self.push_and_add(classification_report=classification_report) + + def global_final(self): + classes = self.load("classes") + confusion_matrix = self.fetch("confusion_matrix") + + accuracy = self.fetch("accuracy").get_value() + n_obs = self.fetch("n_obs") + accuracy_ci = 1.96 * np.sqrt((accuracy * (1 - accuracy)) / n_obs) + + roc_curves = self.fetch("roc_curve").get_curves() + ( + precision, + recall, + specificity, + f_score, + precision_avgs, + recall_avgs, + specificity_avgs, + f_score_avgs, + ) = self.fetch("classification_report").get_values() + precision = precision.tolist() + recall = recall.tolist() + specificity = specificity.tolist() + f_score = f_score.tolist() + + cm_chart = MultilabelConfisionMatrix( + "Confusion Matrix", confusion_matrix, classes.tolist() + ) + + aucs = [] + ginis = [] + for fpr, tpr in roc_curves: + auc = np.trapz(tpr, fpr) + gini = 2 * auc - 1 + aucs.append(auc) + ginis.append(gini) + + roc_chart = MulticlassROCCurve("ROC", roc_curves, classes) + + accuracy_report = TabularDataResource( + fields=["Statistic", "Value"], + data=list( + zip( + *[ + ["Accuracy", "Lower c.i.", "Upper c.i."], + [accuracy, accuracy - accuracy_ci, accuracy + accuracy_ci], + ] + ) + ), + title="Overall classification statistics", + ) + + clf_report = TabularDataResource( + fields=["", "Precision", "Recall", "Specificity", "F score"], + data=list( + zip( + *[ + classes.tolist() + ["micro avg", "macro avg", "weighted avg"], + precision + precision_avgs, + recall + recall_avgs, + specificity + specificity_avgs, + f_score + f_score_avgs, + ] + ) + ), + title="Classification Report", + ) + + roc_report = TabularDataResource( + fields=["Class", "AUC", "Gini coefficient"], + data=list(zip(*[classes.tolist(), aucs, ginis])), + title="ROC report", + ) + + self.result = AlgorithmResult( + raw_data={ + "accuracy": accuracy, + "confusion_matrix": confusion_matrix.tolist(), + "roc_curve": roc_curves, + "classes": classes.tolist(), + "precision": precision, + "recall": recall, + "f_score": f_score, + }, + tables=[clf_report, roc_report, accuracy_report], + highcharts=[cm_chart, roc_chart], + ) + + +class MixedAdditiveNB(object): + def __init__(self, alpha=1.0): + self.alpha = alpha + self.gnb = None + self.cnb = None + + @property + def classes_(self): + if self.gnb: + return self.gnb.classes_ + elif self.cnb: + return self.cnb.classes_ + else: + raise ValueError("model hasn't been trained yet") + + def fit(self, y, X_num=None, X_cat=None): + if X_num is not None: + self.gnb = AdditiveGaussianNB() + self.gnb.fit(X_num, y) + if X_cat is not None: + self.cnb = AdditiveCategoricalNB(alpha=self.alpha) + self.cnb.fit(X_cat, y) + + def predict(self, X_num=None, X_cat=None): + if X_num is not None and X_cat is not None: + jll = ( + self.gnb.predict_log_proba(X_num) + + self.cnb.predict_log_proba(X_cat) + - self.gnb.class_log_prior_ + ) + return np.array([self.gnb.classes_[i] for i in jll.argmax(axis=1)]) + elif X_num is not None: + return self.gnb.predict(X_num) + elif X_cat is not None: + return self.cnb.predict(X_cat) + + def predict_proba(self, X_num=None, X_cat=None): + if X_num is not None and X_cat is not None: + probs_num = self.gnb.predict_proba(X_num) + probs_cat = self.cnb.predict_proba(X_cat) + normalizations = (probs_num * probs_cat).sum(axis=1)[:, np.newaxis] + return probs_num * probs_cat / normalizations + elif X_num is not None: + return self.gnb.predict_proba(X_num) + elif X_cat is not None: + return self.cnb.predict_proba(X_cat) + + def __add__(self, other): + result = MixedAdditiveNB() + if self.gnb and other.gnb: + result.gnb = self.gnb + other.gnb + if self.cnb and other.cnb: + result.alpha = self.alpha + result.cnb = self.cnb + other.cnb + return result + + # def __repr__(self): + # return repr({"gnb": self.gnb.__dict__, "cnb": self.cnb.__dict__}) + + +class AdditiveCategoricalNB(BaseDiscreteNB): + def __init__(self, alpha=1.0): + self.alpha = alpha + self._class_log_prior_ = np.array([]) + self._feature_log_prob_ = [] + + def fit(self, X, y): + self.n_obs_, self.n_features_ = X.shape + self.classes_, self.class_count_ = np.unique(y, return_counts=True) + self.n_classes_ = len(self.classes_) + self.categories_, self.category_per_feat_count_ = list( + zip(*[np.unique(col, return_counts=True) for col in X.T]) + ) + self.n_categories_ = np.array([len(c) for c in self.categories_]) + self.category_count_ = [ + np.empty((self.n_classes_, self.n_categories_[f])) + for f in xrange(self.n_features_) + ] + for ci, c in enumerate(self.classes_): + X_where_x = X[np.where(y == c)[0]] + for fi, feature in enumerate(X_where_x.T): + counter = Counter(feature) + self.category_count_[fi][ci, :] = np.array( + [counter[cat] for cat in self.categories_[fi]] + ) + + def __add__(self, other): + def sum_elementwise(x, y): + return [xi + yi for xi, yi in zip(x, y)] + + if self.alpha != other.alpha: + raise ValueError("alphas do not agree") + result = AdditiveCategoricalNB(alpha=self.alpha) + + result.n_obs_ = self.n_obs_ + other.n_obs_ + + if self.n_features_ != other.n_features_: + raise ValueError("n_features_ do not agree") + result.n_features_ = self.n_features_ + + if (self.classes_ != other.classes_).all(): + raise ValueError("classes_ do not agree") + result.classes_ = self.classes_ + + result.class_count_ = self.class_count_ + other.class_count_ + + if self.n_classes_ != other.n_classes_: + raise ValueError("n_classes_ do not agree") + result.n_classes_ = self.n_classes_ + + result.category_per_feat_count_ = sum_elementwise( + self.category_per_feat_count_, other.category_per_feat_count_ + ) + + if not all( + [(c1 == c2).all() for c1, c2 in zip(self.categories_, other.categories_)] + ): + raise ValueError("categories_ do not agree") + result.categories_ = self.categories_ + + result.n_categories_ = sum_elementwise(self.n_categories_, other.n_categories_) + + result.category_count_ = sum_elementwise( + self.category_count_, other.category_count_ + ) + + return result + + @property + def class_log_prior_(self): + if not self._class_log_prior_.any(): + with warnings.catch_warnings(): + # silence the warning when count is 0 because class was not yet + # observed + warnings.simplefilter("ignore", RuntimeWarning) + log_class_count = np.log(self.class_count_) + self._class_log_prior_ = log_class_count - np.log(self.class_count_.sum()) + return self._class_log_prior_ + + @property + def feature_log_prob_(self): + if not self._feature_log_prob_: + feature_log_prob = [] + for i in range(self.n_features_): + smoothed_cat_count = self.category_count_[i] + self.alpha + smoothed_class_count = smoothed_cat_count.sum(axis=1) + feature_log_prob.append( + np.log(smoothed_cat_count) + - np.log(smoothed_class_count.reshape(-1, 1)) + ) + self._feature_log_prob_ = feature_log_prob + return self._feature_log_prob_ + + def _joint_log_likelihood(self, X): + if not X.shape[1] == self.n_features_: + raise ValueError( + "Expected input with %d features, got %d instead" + % (self.n_features_, X.shape[1]) + ) + jll = np.zeros((X.shape[0], self.class_count_.shape[0])) + for i in range(self.n_features_): + categories = X[:, i] + indices = [np.where(self.categories_[i] == cat)[0][0] for cat in categories] + jll += self.feature_log_prob_[i][:, indices].T + total_ll = jll + self.class_log_prior_ + return total_ll + + def __eq__(self, other): + raise NotImplementedError + + +class AdditiveGaussianNB(GaussianNB): + def __init__(self, priors=None, var_smoothing=1e-9): + self._class_log_prior_ = np.array([]) + super(AdditiveGaussianNB, self).__init__(priors, var_smoothing) + + def fit(self, X, y): + self.n_obs_, self.n_feats_ = X.shape + super(AdditiveGaussianNB, self).fit(X, y) + + @property + def class_log_prior_(self): + if not self._class_log_prior_.any(): + with warnings.catch_warnings(): + # silence the warning when count is 0 because class was not yet + # observed + warnings.simplefilter("ignore", RuntimeWarning) + log_class_count = np.log(self.class_count_) + self._class_log_prior_ = log_class_count - np.log(self.class_count_.sum()) + return self._class_log_prior_ + + def __add__(self, other): + if self.var_smoothing != other.var_smoothing: + raise ValueError("var_smoothing values do not agree") + if self.priors != other.priors: + raise ValueError("priors do not agree") + if (self.classes_ != other.classes_).all(): + raise ValueError("classes_ do not agree") + + class_count_1 = self.class_count_[:, np.newaxis] + class_count_2 = other.class_count_[:, np.newaxis] + n_obs_total = self.n_obs_ + other.n_obs_ + class_count_total = class_count_1 + class_count_2 + + theta_total = ( + class_count_1 * self.theta_ + class_count_2 * other.theta_ + ) / class_count_total + + self.sigma_[:, :] -= self.epsilon_ + other.sigma_[:, :] -= other.epsilon_ + epsilon_total = max(self.epsilon_, other.epsilon_) + ssd_1 = class_count_1 * self.sigma_ + ssd_2 = class_count_2 * other.sigma_ + total_ssd = ( + ssd_1 + + ssd_2 + + (class_count_1 * class_count_2 / class_count_total) + * (self.theta_ - other.theta_) ** 2 + ) + sigma_total = total_ssd / class_count_total + sigma_total += epsilon_total + + result = AdditiveGaussianNB(self.priors, self.var_smoothing) + result.n_obs_ = n_obs_total + result.classes_ = self.classes_ + result.sigma_ = sigma_total + result.theta_ = theta_total + result.epsilon_ = epsilon_total + result.class_count_ = class_count_total.flatten() + result.class_prior_ = result.class_count_ / n_obs_total + return result + + def __eq__(self, other): + if self.var_smoothing != other.var_smoothing: + return False + if self.priors != other.priors: + return False + if (self.classes_ != other.classes_).all(): + return False + if not np.isclose(self.theta_, other.theta_).all(): + return False + if not np.isclose(self.sigma_, other.sigma_).all(): + return self.sigma_, other.sigma_ + if (self.class_count_ != other.class_count_).all(): + return False + if (self.class_prior_ != other.class_prior_).all(): + return False + if self.n_obs_ != other.n_obs_: + return False + if self.n_feats_ != other.n_feats_: + return False + return True + + +class Mediant(object): + def __init__(self, num, den): + self.num = num + self.den = den + + def __add__(self, other): + return Mediant(self.num + other.num, self.den + other.den) + + def __repr__(self): + return str(self.get_value()) + + def get_value(self): + return float(self.num) / float(self.den) + + +if __name__ == "__main__": + import time + from mipframework import create_runner + + algorithm_args = [ + "-x", + # "lefthippocampus,righthippocampus,leftaccumbensarea", + # "gender,apoe4,agegroup", + "lefthippocampus,righthippocampus,leftaccumbensarea,gender,apoe4,agegroup", + "-y", + "alzheimerbroadcategory", + "-alpha", + "1", + "-k", + "10", + "-pathology", + "dementia", + "-dataset", + "adni", + "-filter", + "", + ] + runner = create_runner(NaiveBayes, algorithm_args=algorithm_args, num_workers=3,) + start = time.time() + runner.run() + end = time.time() + # print("Completed in ", end - start) diff --git a/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/properties.json b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/properties.json new file mode 100644 index 0000000000000000000000000000000000000000..334684d05e376e7285cdc29561b173a03b4aba00 --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES/properties.json @@ -0,0 +1,80 @@ +{ + "name": "NAIVE_BAYES", + "desc": "Naive Bayes classifier for numerical data (Gaussian NB) and nominal data (Categorical NB).", + "label": "Naive Bayes classifier", + "type": "python_multiple_local_global", + "status": "enabled", + "parameters": [ + { + "name": "x", + "label": "x", + "desc": "Independent variables: A list of variables from database.", + "type": "column", + "columnValuesSQLType": "", + "columnValuesIsCategorical": "", + "columnValuesNumOfEnumerations": "", + "value": "righthippocampus,lefthippocampus", + "valueNotBlank": true, + "valueMultiple": true, + "valueType": "string" + }, { + "name": "y", + "label": "y", + "desc": "Dependent variable: A categorical variable form database.", + "type": "column", + "columnValuesSQLType": "", + "columnValuesIsCategorical": "true", + "columnValuesNumOfEnumerations": "", + "value": "alzheimerbroadcategory", + "valueNotBlank": true, + "valueMultiple": false, + "valueType": "string" + }, { + "name": "alpha", + "label": "alpha", + "desc": "Additive smoothing parameter (0 for no smoothing)", + "type": "other", + "value": "0.1", + "valueNotBlank": true, + "valueMultiple": false, + "valueType": "real" + }, { + "name": "k", + "label": "number of batches", + "desc": "The number of batches that will be used in k-fold crossvalidation.", + "type": "other", + "value": "10", + "valueNotBlank": true, + "valueMultiple": false, + "valueType": "integer", + "valueMin": 2 + }, { + "name": "pathology", + "label": "pathology", + "desc": "The name of the pathology in which the dataset belongs to.", + "type": "pathology", + "value": "dementia", + "valueNotBlank": true, + "valueMultiple": false, + "valueType": "string" + }, { + "name": "dataset", + "label": "dataset", + "desc": "The names of one or more datasets, in which the algorithm will be executed.", + "type": "dataset", + "value": "edsd", + "valueNotBlank": true, + "valueMultiple": true, + "valueType": "string" + }, { + "name": "filter", + "label": "filter", + "desc": "", + "type": "filter", + "value": "", + "valueNotBlank": false, + "valueMultiple": true, + "valueType": "string" + } + ] +} diff --git a/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TESTING/global.template.sql b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TESTING/global.template.sql index 0947a43baef8fbf2fd640b62d4ef2311bebebda5..d460a892a22d1d4f2abafbcbf91f619f09db63cf 100644 --- a/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TESTING/global.template.sql +++ b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TESTING/global.template.sql @@ -7,8 +7,24 @@ select iterationNumber, actualclass, predictedclass, sum(val) as val from %{input_global_tbl} group by actualclass,predictedclass; +--drop table if exists defaultDB.lala; +--create table defaultDB.lala as select * from global_oneconfusionmatrix; + var 'jsonResult' from select '{ "type": "application/json", "data": ' || componentresult || '}' from ( select tabletojson(actualclass,predictedclass,val, "actualclass,predictedclass,val",0) as componentresult from global_oneconfusionmatrix ); -select '{"result": [' || '%{jsonResult}' || ']}'; +--var 'heatmap' from select highchartheatmap(actualclass,predictedclass,val,"confusion matrix", "actual values", "predicted values") from global_oneconfusionmatrix; + +var 'heatmap' from select * from (highchartheatmap title:Confusion_Matrix, xtitle:Actual_Values, ytitle:Predicted_Values select actualclass,predictedclass,val from global_oneconfusionmatrix); + +drop table if exists confusionmatrixstats; +create temp table confusionmatrixstats as +rconfusionmatrixtable select predictedclass,actualclass,val from global_oneconfusionmatrix; + + +var 'a' from select statsval from confusionmatrixstats where statscolname = 'ResultOverall'; +var 'b' from select statsval from confusionmatrixstats where statscolname = 'ResultClassNames'; + + +select '{"result": [' || '%{jsonResult}' ||','||'%{heatmap}' ||','||'%{a}'||',' || '%{b}' ||']}'; diff --git a/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TRAINING/global.template.sql b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TRAINING/global.template.sql index 7616a0cfe1d10e71225f2291470ec3775655eafd..dc7147c7eba859bc2bd458ecd53c904516222811 100644 --- a/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TRAINING/global.template.sql +++ b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TRAINING/global.template.sql @@ -76,6 +76,10 @@ from statistics where colname in (select code from defaultDB.globalmetadatatbl where categorical= 0) and colname <> '%{y}'; + +select Naive_Bayes_Training_inputerrorchecking('%{y}',no) from (select count(distinct classval) as no from global_probabilities); + + --select * from global_probabilities; var 'jsonResult' from select '{ "type": "application/json", "data": ' || componentresult || ', "dbIdentifier": ' || '%{dbIdentifier}' || '}' from ( select tabletojson(colname,val,classval,average,sigma,probability, "colname,val,classval,average,sigma,probability",0) as componentresult diff --git a/Exareme-Docker/src/mip-algorithms/THREE_C/__init__.py b/Exareme-Docker/src/mip-algorithms/THREE_C/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9c2a571a5dc96cdf8de44869a5a060eef8f5a8cf --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/THREE_C/__init__.py @@ -0,0 +1,3 @@ +from .threec import ThreeC + +__all__ = ["ThreeC"] diff --git a/Exareme-Docker/src/mip-algorithms/THREE_C/local.py b/Exareme-Docker/src/mip-algorithms/THREE_C/local.py new file mode 100644 index 0000000000000000000000000000000000000000..41b837a1104503903da098ae08bb4dc851242afe --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/THREE_C/local.py @@ -0,0 +1,10 @@ +import sys +from threec import ThreeC + + +def main(args): + ThreeC(args[1:]).local_pure() + + +if __name__ == "__main__": + ThreeC(sys.argv[1:]).local_pure() diff --git a/Exareme-Docker/src/mip-algorithms/THREE_C/properties.json b/Exareme-Docker/src/mip-algorithms/THREE_C/properties.json new file mode 100644 index 0000000000000000000000000000000000000000..d7b7990f4bc4a78ab8cd0c02592abd02854a026d --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/THREE_C/properties.json @@ -0,0 +1,170 @@ +{ + "name": "THREE_C", + "desc": "Categorization, Clustering and Classification", + "label": "3C", + "type": "python_local", + "status": "enabled", + "parameters": [ + { + "name": "y", + "label": "y", + "desc": "Potential biomarkers. Imaging, Proteins, Genetics etc.", + "type": "column", + "columnValuesSQLType": "real, integer, text", + "columnValuesIsCategorical": "", + "value": "lefthippocampus, righthippocampus, leftcaudate", + "valueNotBlank": true, + "valueMultiple": true, + "valueType": "string" + }, + { + "name": "x", + "label": "x", + "desc": "Clinical measurements. Disease symptoms, Patients' functional state and abilities.", + "type": "column", + "columnValuesSQLType": "real, integer, text", + "columnValuesIsCategorical": "", + "value": "apoe4, gender, agegroup", + "valueNotBlank": true, + "valueMultiple": true, + "valueType": "string" + }, + { + "name": "pathology", + "label": "pathology", + "desc": "The name of the pathology that the dataset belongs to.", + "type": "pathology", + "value": "dementia", + "valueNotBlank": true, + "valueMultiple": false, + "valueType": "string" + }, + { + "name": "dataset", + "label": "dataset", + "desc": "It contains the names of one or more datasets, in which the algorithm will be executed. It cannot be empty", + "type": "dataset", + "value": "adni", + "valueNotBlank": true, + "valueMultiple": true, + "valueType": "string" + }, + { + "name": "filter", + "label": "filter", + "desc": "", + "type": "filter", + "value": "", + "valueNotBlank": false, + "valueMultiple": true, + "valueType": "string" + }, + { + "name": "dx", + "label": "dx", + "desc": "Diagnosis.", + "type": "other", + "value": "alzheimerbroadcategory", + "valueNotBlank": true, + "valueMultiple": false, + "valueType": "string" + }, + { + "name": "c2_feature_selection_method", + "label": "c2_feature_selection_method", + "desc": "", + "type": "other", + "value": "RF", + "defaultValue": "RF", + "valueNotBlank": true, + "valueMultiple": false, + "valueType": "string", + "enumValues": [ + "RF", + "AIC", + "AIC_MSFDR", + "BIC" + ] + }, + { + "name": "c2_num_clusters_method", + "label": "c2_num_clusters_method", + "desc": "", + "type": "other", + "value": "Euclidean", + "defaultValue": "Euclidean", + "valueNotBlank": true, + "valueMultiple": false, + "valueType": "string", + "enumValues": [ + "Euclidean", + "Manhattan", + "hclust_Euclidean", + "hclust_Manhattan" + ] + }, + { + "name": "c2_num_clusters", + "label": "c2_num_clusters", + "desc": "", + "type": "other", + "value": "6", + "defaultValue": "6", + "valueMin": 2, + "valueNotBlank": true, + "valueMultiple": false, + "valueType": "integer" + }, + { + "name": "c2_clustering_method", + "label": "c2_clustering_method", + "desc": "", + "type": "other", + "value": "Euclidean", + "defaultValue": "Euclidean", + "valueNotBlank": true, + "valueMultiple": false, + "valueType": "string", + "enumValues": [ + "Euclidean", + "Manhattan", + "Heuclidean", + "Hmanhattan" + ] + }, + { + "name": "c3_feature_selection_method", + "label": "c3_feature_selection_method", + "desc": "", + "type": "other", + "value": "RF", + "defaultValue": "RF", + "valueNotBlank": true, + "valueMultiple": false, + "valueType": "string", + "enumValues": [ + "RF", + "AIC", + "AIC_MSFDR", + "BIC" + ] + }, + { + "name": "c3_classification_method", + "label": "c3_classification_method", + "desc": "", + "type": "other", + "value": "RF", + "defaultValue": "RF", + "valueNotBlank": true, + "valueMultiple": false, + "valueType": "string", + "enumValues": [ + "RF", + "RF_downsampling", + "CART_information", + "CART_gini" + ] + } + ] +} diff --git a/Exareme-Docker/src/mip-algorithms/THREE_C/threec.R b/Exareme-Docker/src/mip-algorithms/THREE_C/threec.R new file mode 100644 index 0000000000000000000000000000000000000000..8d20001dbf0c66bc312c04aadf747a61e5686ace --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/THREE_C/threec.R @@ -0,0 +1,18 @@ +library(CCC) + +data(c3_sample1) +data(c3_sample1_categories) + +x <- get_xy_from_DATA_C2(c3_sample1, c3_sample1_categories)$x +y <- get_xy_from_DATA_C2(c3_sample1, c3_sample1_categories)$y + +C2_results <- C2(x, y, feature_selection_method="RF", num_clusters_method="Manhattan", clustering_method="Manhattan", plot.num.clus=TRUE, plot.clustering=TRUE, k=6) + +C2_results + +PBx <- get_PBx_from_DATA_C3(c3_sample1, c3_sample1_categories) +new_y <- C2_results[[3]] + +C3_results <- C3(PBx = PBx, newy = new_y, feature_selection_method = "RF", classification_method="RF") + +table(new_y, C3_results[[2]]) diff --git a/Exareme-Docker/src/mip-algorithms/THREE_C/threec.py b/Exareme-Docker/src/mip-algorithms/THREE_C/threec.py new file mode 100755 index 0000000000000000000000000000000000000000..ae04da9c17589c136649f2556b6eaea39af76ccb --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/THREE_C/threec.py @@ -0,0 +1,1255 @@ +#!/usr/bin/env python + +from __future__ import print_function +from __future__ import division + + +import rpy2.robjects as robjects +from rpy2.robjects import pandas2ri +import pandas as pd +import numpy as np + +from mipframework import Algorithm, AlgorithmResult, TabularDataResource + + +class ThreeC(Algorithm): + def __init__(self, cli_args): + super(ThreeC, self).__init__(__file__, cli_args, intercept=False) + + def local_pure(self): + c2_feature_selection_method = self.parameters.c2_feature_selection_method + c2_num_clusters_method = self.parameters.c2_num_clusters_method + c2_clustering_method = self.parameters.c2_clustering_method + # ======================================================================= + # NOTE: number_of_clusters parameter default value doesn't work in R code + # ======================================================================= + c2_num_clusters = int(self.parameters.c2_num_clusters) + + c3_feature_selection_method = self.parameters.c3_feature_selection_method + c3_classification_method = self.parameters.c3_classification_method + + cm_names = self.parameters.x + pb_names = self.parameters.y + markers_and_biomarkers = self.data.full[cm_names + pb_names] + diag_name = self.parameters.dx + diagnosis = self.data.db.select_vars_from_data( + [diag_name], self.parameters.dataset, self.parameters.filter + ) + full_data = pd.concat([diagnosis, markers_and_biomarkers], axis=1) + + var_names = [diag_name] + cm_names + pb_names + var_categories = ["DX"] + ["CM"] * len(cm_names) + ["PB"] * len(pb_names) + full_metadata = pd.DataFrame( + {"varName": var_names, "varCategory": var_categories} + ) + + r_data = pandas2ri.py2ri(full_data) + r_md = pandas2ri.py2ri(full_metadata) + robjects.globalenv["data"] = r_data + robjects.globalenv["metadata"] = r_md + + define_r_funcs() + + robjects.r( + """ + x <- get_xy_from_DATA_C2(data, metadata)$x + y <- get_xy_from_DATA_C2(data, metadata)$y + """ + ) + robjects.r( + """ + C2_results <- C2(x, + y, + feature_selection_method="{fsm}", + num_clusters_method="{ncm}", + clustering_method="{cm}", + plot.num.clus=TRUE, + plot.clustering=TRUE, + k={nc} + ) + """.format( + fsm=c2_feature_selection_method, + ncm=c2_num_clusters_method, + cm=c2_clustering_method, + nc=c2_num_clusters, + ) + ) + robjects.r( + """ + PBx <- get_PBx_from_DATA_C3(data, metadata) + new_y <- C2_results[[3]] + """ + ) + robjects.r( + """ + C3_results <- C3(PBx = PBx, + newy = new_y, + feature_selection_method = "{fsm}", + classification_method="{cm}" + ) + result <- table(new_y, C3_results[[2]]) + """.format( + fsm=c3_feature_selection_method, cm=c3_classification_method + ) + ) + res = np.array(list(robjects.globalenv["result"])) + res = res.reshape(c2_num_clusters, c2_num_clusters).tolist() + + table_out = TabularDataResource( + fields=[str(i + 1) for i in range(len(res))], + data=tuple(res), + title="3C result", + ) + self.result = AlgorithmResult( + raw_data=dict(), tables=[table_out], highcharts=[], + ) + + +def define_r_funcs(): + rdef_get_xy_from_DATA_C2() + rdef_feature_selection() + rdef_Feature_Selection_dummy_regressions() + rdef_Feature_Selection_RF() + rdef_Feature_Selection_BIC() + rdef_MSFDR() + rdef_Feature_Selection_AIC_MSFDR() + rdef_Feature_Selection_AIC() + rdef_FDR_selection() + rdef_number_of_clusters() + rdef_k_euclidean() + rdef_k_manhattan() + rdef_khclust_euc() + rdef_khclust_man() + rdef_clustering() + rdef_cluster_euclidean() + rdef_cluster_manhattan() + rdef_hclust_euc() + rdef_hclust_man() + rdef_C2() + rdef_get_PBx_from_DATA_C3() + rdef_C3() + rdef_classification_fun() + rdef_RF_classify() + rdef_RF_one_by_one() + rdef_cart_function() + + +def rdef_get_xy_from_DATA_C2(): + robjects.r( + """ + #' Title get_xy_from_DATA_C2 + #' + #' @param DATA Full data matrix, includes all observations for all the variables + #' @param META_DATA Need to have at least 2 columns, one with all variables name, another one which indicate + #' the type of each variable (CM, DX, PB) + #' + #' @return a list of important variables + #' + #' @export + #' + #' @examples + #' # x <- get_xy_from_DATA_C2(DATA, META_DATA)[[1]] + #' # y <- get_xy_from_DATA_C2(DATA, META_DATA)[[2]] + get_xy_from_DATA_C2 <- function(DATA, META_DATA) { + # DATA META_DATA + x <- DATA[, META_DATA$varName[META_DATA$varCategory == "CM"]] + y <- DATA[, META_DATA$varName[META_DATA$varCategory == "DX"]] + list(x = x, y = y) + } + """ + ) + + +# ================= +# Feature Selection +# ================= +def rdef_feature_selection(): + robjects.r( + """ + #' Title Features Selection + #' + #' @param x Data matrix + #' @param y Dependent variable + #' @param method The method to be used for the feature selection: Random forest, AIC, AIC with MSFDR or BIC + #' @param ... further arguments to be passed to or from other methods + #' + #' @return a list of important variables + #' + #' @export + #' + #' @examples + #' # feature_selection(x, y, method='RF') + #' # feature_selection(x[, 1:30], y, method='BIC') + #' # feature_selection(x, y, method='FDR_screening') + feature_selection <- function(x, y, method = "RF", ...) { + if (method == "RF") { + output <- Feature_Selection_dummy_regressions(x, y, Feature_Selection_RF, + ...) # ('...' : p) + } + if (method == "AIC_MSFDR") { + output <- Feature_Selection_dummy_regressions(x, y, Feature_Selection_AIC_MSFDR, + ...) # ('...' : q, print.the.steps) + } + if (method == "BIC") { + output <- Feature_Selection_dummy_regressions(x, y, Feature_Selection_BIC, + ...) # ('...' : nbest, nvmax, nmin, plot) + } + if (method == "AIC") { + output <- Feature_Selection_dummy_regressions(x, y, Feature_Selection_AIC) + } + if (method == "FDR_screening") { + output <- Feature_Selection_dummy_regressions(x, y, FDR_selection, + ...) # ('...' : q, eta) + } + if (method == "LASSO") { + output <- Feature_Selection_dummy_regressions(x, y, LASSO_selection) + } + return(output) + } + """ + ) + + +def rdef_Feature_Selection_dummy_regressions(): + robjects.r( + """ + #' Finds a subset of variables based on all dummy regressions + #' Title Feature Selection Dummy Regression + #' + #' @param x Data matrix + #' @param y Dependent variable + #' @param FUN Indicating which method to use for feature selection + #' @param ... further arguments to be passed to or from other methods + #' + #' @return a vector with the names of the important variables + #' @export + #' + #' @examples + #' Feature_Selection_dummy_regressions(x, y, Feature_Selection_RF) + #' + Feature_Selection_dummy_regressions <- function(x, y, FUN, ...) { + + u_y <- unique(y) + selected_variables <- list() + + for (i in seq_along(u_y)) { + dummy_y <- as.numeric(y == u_y[i]) + # FUN(x, y, ...) + selected_variables[[i]] <- FUN(x, dummy_y, ...) + } + + # Union of all selected variables + unique(unlist(selected_variables)) + } + """ + ) + + +# ================================= +# Feature Selection - sub-functions +# ================================= + +# ============== +# Random Forests +# ============== +def rdef_Feature_Selection_RF(): + robjects.r( + """ + #' Title Feature Selection Using Random Forest + #' + #' @param x Data matrix + #' @param y Categorial dependent variable (factor) + #' @param p Precentage of the number of variables to be chosen from x. Default value is 0.1. + #' @return list of p precentage of the variables chosen by their Gini importance index. + #' + #' @export + #' + #' @examples + #' # Feature_Selection_RF(x, y, p = 0.1) + #' + Feature_Selection_RF <- function(x, y, p = 0.1) { + library(randomForest) + + if (!is.factor(y)) { + warning("y is not a factor - but was coerced into one.") + y <- as.factor(y) + } + + rf_DX_by_CM <- randomForest(y ~ ., data = x, importance = TRUE, proximity = TRUE) + + var_import <- importance(rf_DX_by_CM)[, "MeanDecreaseAccuracy"] + m <- round(dim(x)[2] * p) # We'll save just 10% of the variables, the precentage can be changed + subset_vars <- sort(var_import, decreasing = TRUE)[1:m] # Sort the variables by their Gini importance index + important_var_RF <- names(subset_vars) + + return(unlist(important_var_RF)) + } + """ + ) + + +# === +# BIC +# === +def rdef_Feature_Selection_BIC(): + robjects.r( + """ + #' Title Feature Selection Using BIC + #' + #' @param x Data matrix + #' @param y response vector (must be numeric?) + #' @param nbest number of subsets of each size to record + #' @param nvmax maximum size of subsets to examine + #' @param nmin number of minimum varibles to be included in the suggested final model + #' @param plot.BIC if TRUE (default) the function plots a table of models showing which variables are in each model. + #' The models are ordered by the specified model selection statistic. + #' @return + #' vector with the names of variables of the model with minimum BIC between the models including more then 'nmin' variables' of regsubsets object + #' @export + #' + #' @examples + #' # Feature_Selection_BIC(x[, 1:30], y, nbest=1, nvmax=5, plot.BIC=TRUE, nmin=4) + Feature_Selection_BIC <- function(x, y, nbest = 1, nvmax = 12, nmin = 4, + plot.BIC = FALSE) { + library(leaps) + library(car) + fulldata <- data.frame(x, y) # Creating one joint data.frame of the data + RET <- regsubsets(y ~ ., data = fulldata, nbest = nbest, nvmax = nvmax, + really.big = TRUE) + # if (plot.BIC) { plot(RET, scale = 'bic') } + summary_RET <- summary(RET) # Saving the summary of the rugsubsets output + help_mat <- matrix(as.numeric(summary_RET$which), nrow = (nvmax * nbest), + ncol = (dim(x)[2] + 1)) # Which variables were chosen for each model + num_var_each_model <- apply(help_mat, 1, sum) # Counting the number of variables chosen for each model + chosen_models <- summary_RET$bic[which(num_var_each_model >= nmin)] # Saving the BIC value of the models which includes more then 'nmin' variables + ind_model_min_BIC <- which(chosen_models == min(chosen_models)) # Which model with more then 3 variables have the minimum BIC + + return(unlist(colnames(x)[which(help_mat[ind_model_min_BIC, ] == 1) - + 1])) + } + """ + ) + + +# ============ +# AIC with FDR +# ============ +def rdef_MSFDR(): + robjects.r( + """ + #' Title Forward Selection Using AIC Criteria and MSFDR Procedure + #' + #' @param minimal.lm lm function output of model which includes an intercept + #' @param maximal.lm lm function output of model which not includes an intercept + #' @param q Significant level. default as 0.05 + #' @param print.the.steps if TRUE the Lambda, model size, and final model at each iteration will be printed; + #' Default as FALSE + #' @param print.running.time If TRUE the running time will be printed, it is equal to the value of print.the.steps + #' Default as False. + #' @return + #' Final model, running time, summary of AIC_MSFDR object + #' @export + #' + #' @examples + #' # Feature_Selection_AIC_MSFDR(x, y, q = 0.5, print.the.steps = FALSE) + #' + MSFDR <- function(minimal.lm, maximal.lm, q, print.the.steps, print.running.time = print.the.steps) { + # computes forward model selection using the multiple stage FDR + # controlling procedure (MSFDR) + + if (!(class(minimal.lm) == "lm" & class(maximal.lm) == "lm")) { + print("one of the models you entered aren't linear models (lm), please try fitting lm only") + break + } + + if (print.running.time) + time <- proc.time() + + library(MASS) + algorithm.direction <- "forward" # always forward + the.scope <- list(lower = minimal.lm, upper = maximal.lm) + trace.stepAIC <- ifelse(print.the.steps, 1, 0) + iteration.number <- 1 + + m <- extractAIC(maximal.lm)[1] - 1 # check if the full model should include the intercept or not !!!!!! + i <- max(extractAIC(minimal.lm)[1] - 1, 1) # so if the model is with intercept only, the i size won't be 0. + # q = .05 # default + + Lambda <- qnorm((1 - 0.5 * q * i/(m + 1 - i * (1 - q))))^2 + + if (print.the.steps) { + print(paste("Starting Lambda is: ", Lambda)) + } + + # first step of the algorithm + new.lm <- stepAIC(minimal.lm, direction = algorithm.direction, scope = the.scope, + k = Lambda, trace = trace.stepAIC) + new.lm.model.size <- extractAIC(new.lm)[1] - 1 + + + while (new.lm.model.size > i) { + iteration.number <- iteration.number + 1 + + if (print.the.steps) { + print("=========================================") + print("=========================================") + print(paste("iteration number: ", iteration.number)) + print(paste("current model size is:", new.lm.model.size, ">", + i, " (which is bigger then the old model size)")) + } + + i <- new.lm.model.size + Lambda <- qnorm((1 - 0.5 * q * i/(m + 1 - i * (1 - q))))^2 + + if (print.the.steps) { + print(paste("new Lambda is: ", Lambda)) + } + + new.lm <- stepAIC(new.lm, direction = algorithm.direction, scope = the.scope, + k = Lambda, trace = trace.stepAIC) + + new.lm.model.size <- extractAIC(new.lm)[1] - 1 + } + + + if (print.the.steps) { + print("=========================================") + print("=========================================") + print("=========================================") + print("The final model is: ") + print(new.lm$call) + } + + if (print.running.time) { + print("") + print("Algorithm running time was:") + print(proc.time() - time) + } + + return(new.lm) + + } + """ + ) + + +def rdef_Feature_Selection_AIC_MSFDR(): + robjects.r( + """ + # TODO: MSFDR does NOT (!!!) work with non-numeric values. Using it for + # factors, will produce very wrong results It should be considered if + # to extend it to also work with factors (e.g.: through multinomial + # regression) + Feature_Selection_AIC_MSFDR <- function(x, y, q = 0.05, print.the.steps = FALSE) { + y <- as.numeric(y) + fulldata <- data.frame(x, y = y) + # Creating one joint data.frame of the data defining the smallest and + # largest lm we wish to progress through + smallest_linear_model <- lm(y ~ +1, data = fulldata) + largest_linear_model <- lm(y ~ ., data = fulldata) + + # Implementing the MSFDR functions (with q = 0.05) + AIC_MSDFR <- MSFDR(minimal.lm = smallest_linear_model, maximal.lm = largest_linear_model, + q, print.the.steps) + sum <- summary(AIC_MSDFR) # Saving the summary of the AIC.MSFDR procedure + important_var_FDR <- which(!is.na(AIC_MSDFR$coeff)) + important_var_FDR <- names(important_var_FDR) + + return(unlist(important_var_FDR[2:length(important_var_FDR)])) + } + """ + ) + + +# =================== +# AIC without FDR ### +# =================== +def rdef_Feature_Selection_AIC(): + robjects.r( + """ + #' Title Feature Selection Using AIC + #' + #' @param x data matrix + #' @param y categorical variable (factor) + #' + #' @return + #' Returns a list with two items. The first is a list of important variables. The second + #' is NA if print.summary.AIC==FALSE or the summary of AIC if TRUE. + #' @export + #' + #' @examples + #' # Feature_Selection_AIC(x, y) + Feature_Selection_AIC <- function(x, y) { + library(MASS) + y <- as.numeric(y) + fulldata <- data.frame(x, y) # Creating one joint data.frame of the data + smallest_linear_model <- lm(y ~ +1, data = fulldata) + largest_linear_model <- lm(y ~ . + 1, data = fulldata) + + AIC_procedure <- stepAIC(object = smallest_linear_model, scope = list(lower = smallest_linear_model, + upper = largest_linear_model), direction = "forward", trace = FALSE) + important_var_AIC <- names(AIC_procedure$coeff) + + return(unlist(important_var_AIC[2:length(important_var_AIC)])) # Extracting the print of 'Intercept' + } + """ + ) + + +# ================================== +# FDR Selection (F and Chi-sq tests) +# ================================== +def rdef_FDR_selection(): + robjects.r( + """ + #' Title Feature Selection Using FDR selection + #' + #' @param x data matrix + #' @param y categorical variable (factor) + #' @param q adjusted p value threshold level. The chosen variables will have adjusted p value smaller than q + #' @param eta eta squared threshold, the chosen variables will have eta value greater then eta. + #' + #' @return + #' Returns a list of the selected variables + #' @export + #' + #' @examples + #' # FDR_selection(x, y, q = 0.001, eta = 0.1) + FDR_selection <- function(x, y, q = 0.05, eta = 0.1) { + + if (!is.factor(y)) { + warning("y is not a factor - but was coerced into one.") + y <- as.factor(y) + } + + eta_squared <- rep(NA, dim(x)[2]) + original_p_val <- rep(NA, dim(x)[2]) + for (i in 1:dim(x)[2]) { + # variable is discrete + if (sum(floor(x[, i]) == x[, i]) == dim(x)[2]) + { + original_p_val[i] <- chisq.test(x = x[, i], y)$p.value + eta_squared[i] <- summary.lm(lm(as.factor(x[, i]) ~ as.factor(y)))$r.squared + } # variable is not discrete + else { + anova_model <- anova(lm(x[, i] ~ y + 0)) + original_p_val[i] <- anova_model[[5]][1] + eta_squared[i] <- summary.lm(lm(x[, i] ~ as.factor(y)))$r.squared + } + } + names(original_p_val) <- colnames(x) + adjust_p_val <- p.adjust(original_p_val, method = "BH") + + is_smaller <- ifelse(adjust_p_val < q & eta_squared > eta, 1, 0) + screening <- data.frame("var" = names(original_p_val), original_p_val, adjust_p_val, + eta_squared, is_smaller, row.names = c(1:length(original_p_val))) + keep_vars <- screening$var[which(is_smaller == 1)] + screening <- screening[order(original_p_val), ] + + + return(as.character(keep_vars)) + } + #' Title LASSO + #' + #' @param x Data matrix + #' @param y Dependent variable + #' + #' @return + #' plot and table which advises how many clusters should be + #' + #' @export + #' + #' @examples + #' # LASSO_selection(x, y) + # LASSO_selection<-function(x, y) { cvfit <- cv.glmnet(as.matrix(x), y) + # important_var_LASSO <- as.matrix(coef(cvfit, s = 'lambda.1se')) + # important_var_LASSO <- important_var_LASSO[important_var_LASSO[, 1] + # != 0, ] important_var_LASSO <- + # important_var_LASSO[names(important_var_LASSO) != '(Intercept)'] + # reduced_x <- x[, names(important_var_LASSO)] return(reduced_x) } + """ + ) + + +# ====================================================== +# Deciding on number of clusters and clustering the data +# ====================================================== +def rdef_number_of_clusters(): + robjects.r( + """ + #' Title Deciding on Number of Clusters + #' + #' @param x Data matrix + #' @param method character string indicating how the "optimal" number of clusters: Euclidean (default), Manhattan, + #' heirarchical euclidean or heirarchcal manhattan + #' @param K.max the maximum number of clusters to consider, must be at least two. Default value is 10. + #' @param B integer, number of Monte Carlo ("bootstrap") samples. Default value is 100. + #' @param verbose integer or logical, determining if "progress" output should be printed. The default prints + #' one bit per bootstrap sample. Default value is FALSE. + #' @param scale if TRUE (default) the data matrix will be scaled. + #' @param diss if TRUE (default as FALSE) x will be considered as a dissimilarity matrix. + #' @param cluster.only if true (default as FALSE) only the clustering will be computed and returned, see details. + #' @param plot.num.clus if TRUE (default) the gap statistic plot will be printed + #' + #' @return + #' plot and table which advises how many clusters should be + #' + #' @export + #' + #' @examples + #' # number_of_clusters(subx, B=50, method='Euclidean') + #' + number_of_clusters <- function(x, method = "Euclidean", K.max = 10, B = 100, + verbose = FALSE, plot.num.clus = TRUE, scale = TRUE, diss = FALSE, + cluster.only = TRUE) { + # scale + if (scale) { + x <- scale(x) + } + + # TODO: what we SHOULD do is pass Euclidean/Man to the functions, as + # well as hclust vs pam... + + if (method == "Euclidean") { + k_clusters <- k_euclidean(x, K.max, B, verbose, plot.num.clus) + } + if (method == "Manhattan") { + k_clusters <- k_manhattan(x, K.max, diss, B, cluster.only, verbose, + plot.num.clus) + } + if (method == "hclust_Euclidean") { + k_clusters <- khclust_euc(x, K.max, B, verbose, plot.num.clus) + + } + if (method == "hclust_Manhattan") { + k_clusters <- khclust_man(x, K.max, B, verbose, plot.num.clus) + + } + return(list(k_clusters)) + } + """ + ) + + +def rdef_k_euclidean(): + robjects.r( + """ + #' Title Gap statisic with k-medoids euclidean + #' + #' @param x Data matrix + #' @param K.max the maximum number of clusters to consider, must be at least two. Default value is 10. + #' @param B integer, number of Monte Carlo ("bootstrap") samples. Default value is 100. + #' @param verbose integer or logical, determining if "progress" output should be printed. The default prints + #' one bit per bootstrap sample. Default value is FALSE. + #' @param plot.num.clus if TRUE (default) the gap statistic plot will be printed + #' + #' @return the clusGap function' values + #' @export + #' + #' @examples + #' # k_euclidean(subx, K.max=8, B=50, verbose=FALSE, plot.num.clus=TRUE) + #' + k_euclidean <- function(x, K.max, B, verbose, plot.num.clus) { + library(cluster) + library(clusterCrit) + + clusGap_best <- cluster::clusGap(x, FUN = pam, K.max = K.max, B, verbose) + + + if (plot.num.clus) { + plot(clusGap_best, main = "Gap Statistic for k-medoids Euclidean") + } + # # Silhouette Criteria for k-medoids sil <- c(rep(NA, 10)) sil[1] <- 0 + # max_sil <- 0 clust_num_sil <- 0 for (i in 2:10) { clust <- pam(x, i, + # diss = FALSE) sil[i] <- intCriteria(x, clust$cluster, 'Silhouette') + # if (as.numeric(sil[i]) > max_sil) { max_sil_means <- sil[i] + # clust_num_sil <- i } } if (plot.num.clus) { plot(as.numeric(sil), + # type = 'l', main = 'Silhouette criteria k-medoids Euclidean') } + + # return(list(clusGap_best, clust)) + return(list(clusGap_best)) + } + """ + ) + + +def rdef_k_manhattan(): + robjects.r( + """ + #' Title Gap statisic with k-medoids manhattan + #' + #' @param x data matrix + #' @param K.max positive integer specifying the number of clusters, less than the number of observations. + #' Default value is 10. + #' @param diss if TRUE (default as FALSE) x will be considered as a dissimilarity matrix + #' @param B integer, number of Monte Carlo ("bootstrap") samples. Default value is 100. + #' @param cluster.only if true (default) only the clustering will be computed and returned, see details. + #' @param verbose integer or logical, determining if "progress" output should be printed. The default prints + #' one bit per bootstrap sample. Default as FALSE. + #' @param plot.num.clus if TRUE (default) the gap statistic plot will be printed + #' @param ... another objects of pam function + #' + #' @return clusGap function' output + #' @export + #' + #' @examples + #' # k_manhattan (subx, K.max = 8, diss=FALSE, B = 50, cluster.only = TRUE, verbose = FALSE) + #' + k_manhattan <- function(x, K.max, diss, B, cluster.only, verbose, plot.num.clus) { + library(cluster) + library(clusterCrit) + library(magrittr) + library(fpc) + + pam_1 <- function(x, k, ...) { + clusters <- x %>% pam(k = k, diss = diss, metric = "manhattan", + cluster.only = cluster.only) + list(clusters = clusters) + } + set.seed(40) + clusGap_best <- clusGap(x, FUN = pam_1, K.max = K.max, B = B, verbose = verbose) + + if (plot.num.clus) { + + plot(clusGap_best, main = "Gap Statistic for k-medoids Manhattan") + } + # #Silhouette criteria with k-medoids manhattan + # sil_med_m<-c(rep(NA,10)) sil_med_m[1]<-0 max_sil_med_m<-0 + # clust_num_sil_med_m<-0 for (i in 2:10) { + # clust_med_m<-pam(Scaled_Reduced_CM_trans,i,diss=FALSE,metric='manhattan') + # sil_med_m[i]<-intCriteria(Scaled_Reduced_CM_trans,clust_med_m$cluster,'Silhouette') + # if (as.numeric(sil_med_m[i]) > max_sil_med_m) { + # max_sil_med_m<-sil_med_m[i] clust_num_sil_med_m<-i } } + # plot(as.numeric(sil_med_m),type='l',main='Silhouette criteria, + # k-medoids manhattan') + return(list(clusGap_best)) + } + """ + ) + + +def rdef_khclust_euc(): + robjects.r( + """ + #' Title Gap statistics for hclust Euclidean + #' + #' @param x data matrix + #' @param K.max positive integer specifying the number of clusters, less than the number of observations. + #' @param B integer, number of Monte Carlo ("bootstrap") samples + #' @param verbose integer or logical, determining if "progress" output should be printed. The default prints + #' one bit per bootstrap sample + #' @param plot.num.clus if TRUE (default) the gap statistic plot will be printed + #' + #' @return the clusGap function output + #' @export + #' + #' @examples + #' # khclust_euc(subx,K.max=10, B=60, verbose = FALSE, plot.num.clus=TRUE ) + #' + khclust_euc <- function(x, K.max, B, verbose, plot.num.clus) { + hclust_k_euc <- function(x, k, ...) { + library(magrittr) + library(cluster) + clusters <- x %>% dist %>% hclust %>% cutree(k = k) + list(clusters = clusters) + } + + clusGap_best <- clusGap(x, FUN = hclust_k_euc, K.max = K.max, B = B, + verbose = verbose) + if (plot.num.clus) { + plot(clusGap_best, main = "Gap statistic, hclust Euclidean") + } + return(clusGap_best) + } + """ + ) + + +def rdef_khclust_man(): + robjects.r( + """ + #' Title Gap statistics for hclust Manhattan + #' + #' @param x data matrix + #' @param K.max positive integer specifying the number of clusters, less than the number of observations. + #' Default value is 10 + #' @param B integer, number of Monte Carlo ("bootstrap") samples. Default value is 100. + #' @param verbose integer or logical, determining if "progress" output should be printed. The default prints + #' one bit per bootstrap sample. Default value is FALSE. + #' @param plot.num.clus if TRUE (default) the gap statistic plot will be printed + #' + #' @return the clusGap function output + #' @export + #' + #' @examples + #' # khclust_man(subx, K.max=8, B=60, verbose=FALSE, plot.num.clus=TRUE) + #' + khclust_man <- function(x, K.max, B, verbose, plot.num.clus) { + hclust_k_man <- function(x, k, ...) { + library(magrittr) + clusters <- x %>% dist(method = "manhattan") %>% hclust %>% cutree(k = k) + list(clusters = clusters) + } + + clusGap_best <- clusGap(x, FUN = hclust_k_man, K.max = K.max, B = B, + verbose = verbose) + if (plot.num.clus) { + plot(clusGap_best, main = "Gap statistic, hclust Manhattan") + } + return(list(clusGap_best)) + } + """ + ) + + +# ===================== +# Clustering the data # +# ===================== +def rdef_clustering(): + robjects.r( + """ + #' Title Clustering + #' + #' @param x data matrix + #' @param k.gap positive integer specifying the number of clusters, less than the number of observation. Default value is 10. + #' @param method Indicating which method to use for clustering. Default is 'Euclidean'. + #' @param plot.clustering if TRUE (default) a 2-dimensional "clusplot" plot will be printed + #' + #' @return vector withnew assigned clusters + #' @export + #' + #' @examples + #' clustering(subx, k.gap = 5, method='Euclidean', plot.clustering=TRUE) + #' + clustering <- function(x, k.gap = 2, method = "Euclidean", plot.clustering = FALSE) { + + if (method == "Euclidean") { + clusters <- cluster_euclidean(x, k.gap, plot.clustering) + } + if (method == "Manhattan") { + clusters <- cluster_manhattan(x, k.gap, plot.clustering) + } + if (method == "Heuclidean") { + clusters <- cluster_euclidean(x, k.gap, plot.clustering) + } + if (method == "Hmanhattan") { + clusters <- cluster_manhattan(x, k.gap, plot.clustering) + } + return(clusters) + } + ### Euclidean ### + #' Title Clustering Using Euclidean distances + #' + #' @param x data matrix + #' @param k.gap positive integer specifying the number of clusters, less than the number of observation. Default value is 10. + #' @param plot.clustering if TRUE (default) a 2-dimensional "clusplot" plot will be printed + #' + #' @return + #' vector with the new assigned clusters + #' + #' @export + #' + #' @examples + #' # cluster_euclidean(subx, k.gap = 5, plot.clustering = TRUE) + #' + """ + ) + + +def rdef_cluster_euclidean(): + robjects.r( + """ + # Title Cluster Euclidean + cluster_euclidean <- function(x, k.gap, plot.clustering) { + library(cluster) + pam_4 <- pam(x, k.gap, diss = FALSE) + if (plot.clustering) { + clusplot(x, pam_4$cluster, color = TRUE, main = c("k-medoids,", + paste = k.gap, "clusters")) + } + clusters <- pam_4$cluster + + return(unlist(clusters)) + } + """ + ) + + +# ========= +# Manhattan +# ========= +def rdef_cluster_manhattan(): + robjects.r( + """ + #' Title Clustering Using Manhattan Distances + #' + #' @param x data matrix + #' @param k.gap positive integer specifying the number of clusters, less than the number of observation. Default value is 10. + #' @param plot.clustering if TRUE (default) a 2-dimensional "clusplot" plot will be printed + #' + #' @return + #' vector with the new assigned clusters + #' @export + #' + #' @examples + #' # cluster_manhattan(subx, k.gap=4, plot.clustering=TRUE) + #' + cluster_manhattan <- function(x, k.gap, plot.clustering) { + pam_3_man <- pam(x, k.gap, diss = FALSE, metric = "manhattan") + if (plot.clustering) { + clusplot(x, pam_3_man$cluster, color = TRUE, main = c("k-medoids,manhattan", + paste(k.gap), "clusters")) + } + clusters <- pam_3_man$cluster + + return(unlist(clusters)) + } + """ + ) + + +def rdef_hclust_euc(): + robjects.r( + """ + ### Hierarchical clustering euclidean ### + #' Title Deciding on number of clusters by using Hierarchical clustering euclidean + #' + #' @param x data matrix + #' @param y Dependent variable + #' @param k.gap positive integer specifying the number of clusters, less than the number of observation. Default value is 10. + #' @param plot.clustering if TRUE (default) a 2-dimensional "clusplot" plot will be printed + #' + #' + #' @return + #' summary table of the distribution to clusters + #' @export + #' + #' @examples + #' hclust_euc(subx, k.gap = 5, plot.clustering=TRUE) + #' + hclust_euc <- function(x, k.gap, plot.clustering) { + d <- dist(x, method = "euclidean") + fit_best <- hclust(d, method = "ward.D") + if (plot.clustering) { + plot(fit_best, main = c("hclust , euclidean,", paste(k.gap), " clusters")) + } + groups_best_4 <- cutree(fit_best, k = k.gap) + rect.hclust(fit_best, k = k.gap, border = "blue") + clusters <- groups_best_4 + return(unlist(clusters)) + } + """ + ) + + +# ================================= +# Hierarchical clustering manhattan +# ================================= +def rdef_hclust_man(): + robjects.r( + """ + #' Title Deciding on number of clusters by Hierarchical clustering manhattan + #' + #' @param x data matrix + #' @param plot.clustering if TRUE (default) a 2-dimensional 'clusplot' plot will be printed + #' + #' @return + #' a list of two variables the hclust function description and a summary table + #' of the distribution to clusters + #' @export + #' + #' @examples + #' hclust_man(subx, k.gap = 5, plot.clustering=TRUE) + #' + hclust_man <- function(x, k.gap, plot.clustering) { + + d_man <- dist(x, method = "manhattan") + fit_best_man <- hclust(d_man, method = "ward.D") + if (plot.clustering) { + plot(fit_best_man, main = c("hclust, manhattan,", paste(k.gap), + "7 clusters")) + } + groups_best_4_man <- cutree(fit_best_man, k = k.gap) + rect.hclust(fit_best_man, k = k.gap, border = "red") + clusters <- groups_best_4_man + + return(unlist(clusters)) + } + """ + ) + + +# ============= +# 3 C functions +# ============= +def rdef_C2(): + robjects.r( + """ + #' Title C2 + #' + #' @param x data matrix + #' @param y Dependent variable + #' @param feature_selection_method method for the feature selection of the clinical measurements stage. Default RF. + #' @param num_clusters_method method for the choosing number of clusters by using the clinical measurements. Default Euclidean. + #' @param k number of clusters to use. If missing, we use a detection method. Defaukt as NULL + #' @param clustering_method method for clustering using the reduced clinical measures. Default is Hmanhattan, + #' + #' @return a list of three variables: + #' 1) vector with the names of the omportant variables chosen. + #' 2) number of classes that will be used for clustering + #' 3) vector of the new assigned clusterst + #' + #' @export + #' + #' @examples + #' resultC2 <- C2(x, y, feature_selection_method='RF', num_clusters_method='Manhattan', clustering_method='Manhattan', plot.num.clus=TRUE, plot.clustering=TRUE) + #' C2(x, y, feature_selection_method='BIC', num_clusters_method='Manhattan', clustering_method='Hmanhattan', plot.num.clus=TRUE, plot.clustering=FALSE, nbest=1, nvmax=8, B=50) + C2 <- function(x, y, feature_selection_method, num_clusters_method, k = NULL, + clustering_method, ...) { + # Feature selection + imp_var <- feature_selection(x, y, method = feature_selection_method) + # print(imp_var) CM_final_vars <- imp_var[[1]][2] # Extracting a list + # of inportant CM variables + subx <- x[, unlist(imp_var)] + # Deciding on number of clusters + if (missing(k)) { + num_clust <- number_of_clusters(x = subx, method = num_clusters_method) + print(num_clust) + # library(car) + user_choise <- function() { + k <- readline(prompt = paste("Enter the chosen number of clusters", + ":\n")) + k <- as.numeric(k) + return(k) + } + num_clust <- user_choise() + + } else { + num_clust <- k + } + # Final clustering + final_cluster <- clustering(subx, k.gap = num_clust) + # print(final_cluster) + return(list(imp_var, num_clust, final_cluster)) + } + """ + ) + + +def rdef_get_PBx_from_DATA_C3(): + robjects.r( + """ + #' Title get_PBx_from_DATA_C3 + #' + #' @param DATA Full data matrix, includes all observations for all the variables + #' @param META_DATA Need to have at least 2 columns, one with all variables name, another one which indicate + #' the type of each variable (CM, DX, PB) + #' + #' @return a list of important variables + #' + #' @export + #' + #' @examples + #' # PBx <- get_PBx_from_DATA_C3(DATA, META_DATA) + #' + get_PBx_from_DATA_C3 <- function(DATA, META_DATA) { + x <- DATA[, META_DATA$varName[META_DATA$varCategory == "PB"]] + return(PBx = x) + } + """ + ) + + +def rdef_C3(): + robjects.r( + """ + #' Title C3 + #' + #' @param PBx data matrix + #' @param newy new assigned clusters, results from C2. + #' @param feature_selection_method method for the feature selection of the Potential Bio-Markers + #' @param classification_method method for classification using the potential bio-markers + #' + #' @return a list of two variables: + #' 1) vector with the names of important variables chosen + #' 2) classification result for each observation + #' @export + #' + #' @examples + #' C3(PBx, newy, feature_selection_method='RF', classification_method='RF') + #' + C3 <- function(PBx, newy, feature_selection_method, classification_method) { + # Feature selection if(!factor(newy)){ newy <- as.factor(newy) } + imp_var <- feature_selection(PBx, newy, method = feature_selection_method) + sub_PBx <- PBx[, imp_var] + # Classification + classification <- classification_fun(PBx, newy, method = classification_method) + return(list(imp_var, unname(classification))) + } + """ + ) + + +def rdef_classification_fun(): + robjects.r( + """ + ####################################### Potential biomarkers classification # + #' Title Classification for the potential Biomarkers + #' + #' @param PBx data matrix + #' @param newy New assigned clusters + #' @param method Classification method for the function to use + #' + #' @return Predicted values for each observation + #' + #' @export + #' + #' @examples + #' # classification_fun(PBx, newy, method='RF') + classification_fun <- function(PBx, newy, method = "RF") { + + if (method == "RF") { + output <- RF_classify(PBx, newy) + } + if (method == "RF_downsampling") { + output <- RF_one_by_one(PBx, newy) + } + if (method == "CART_information") { + output <- cart_function(PBx, newy, criteria = "information") + } + if (method == "CART_gini") { + output <- cart_function(PBx, newy, criteria = "gini") + } + return(output) + } + """ + ) + + +def rdef_RF_classify(): + robjects.r( + """ + ### Random Forest Without Down Sampling ### + #' Title Classification Using Random Forest Without Down Sampling + #' + #' @param PBx data matrix + #' @param newy New assigned clusters + #' + #' @return The predicted values for each observation + #' + #' @export + #' + #' @examples + #' # RF_classify(PBx, newy) + library(randomForest) + RF_classify <- function(PBx, newy) { + if (!is.factor(newy)) { + warning("y is not a factor - but was coerced into one.") + newy <- as.factor(newy) + } + fulldata <- data.frame(PBx, newy) + rf_clus_PB <- randomForest(newy ~ ., data = fulldata, ntree = 50) + model <<- rf_clus_PB + return(rf_clus_PB$predicted) + } + """ + ) + + +def rdef_RF_one_by_one(): + robjects.r( + """ + ### Random forest with down sampling ### + #' Title Classification Using Random Forest Without Down Sampling + #' + #' @param PBx data matrix + #' @param newy New assigned clusters + #' + #' @return a list of two variables: the hclust function description and a summary table + #' of the distribution to clusters + #' @export + #' + #' @examples + #' # RF_one_by_one(PBx, newy) + RF_one_by_one <- function(PBx, newy) { + if (!is.factor(newy)) { + warning("y is not a factor - but was coerced into one.") + newy <- as.numeric(as.factor(newy)) + } + rflist_names <- paste("cluster", c(1:length(unique(newy)))) + rflist <- sapply(rflist_names, function(x) NULL) + for (i in 1:length(unique(newy))) { + class_2 <- ifelse(newy == i, 1, 0) + nmin <- sum(class_2 == 1) + rflist[[i]] <- randomForest(factor(class_2) ~ ., data = PBx, ntree = 1000, + importance = TRUE, proximity = TRUE, sampsize = rep(nmin, 2)) + } + return(rflist) + } + """ + ) + + +def rdef_cart_function(): + robjects.r( + """ + #' # cart_function(PBx, newy, 'information') + ### CART ### + #' Title Classification Using CART + #' + #' @param PBx data matrix + #' @param newy New assigned clusters + #' @param criteria gini or information + #' + #' @return a list of two variables: the hclust function description and a summary table + #' of the distribution to clusters + #' @export + #' + #' @examples + cart_function <- function(PBx, newy, criteria = "gini") { + + fulldata <- data.frame(PBx, newy) + cart <- rpart(newy ~ ., data = fulldata, method = "class", parms = list(split = criteria)) + model <<- cart + pred <- predict(cart, type = "class") + return(pred) + } + """ + ) + + +if __name__ == "__main__": + import time + from mipframework import create_runner + + algorithm_args = [ + "-y", + "lefthippocampus, righthippocampus, leftcaudate", + "-x", + "gender, agegroup", + "-pathology", + "dementia", + "-dataset", + "edsd, ppmi", + "-filter", + "", + "-dx", + "alzheimerbroadcategory", + "-c2_feature_selection_method", + "RF", + "-c2_num_clusters_method", + "Euclidean", + "-c2_num_clusters", + "6", + "-c2_clustering_method", + "Euclidean", + "-c3_feature_selection_method", + "RF", + "-c3_classification_method", + "RF", + ] + runner = create_runner(ThreeC, algorithm_args=algorithm_args, num_workers=1,) + start = time.time() + runner.run() + end = time.time() + print("Completed in ", end - start) diff --git a/Exareme-Docker/src/mip-algorithms/TTEST_PAIRED/local.template.sql b/Exareme-Docker/src/mip-algorithms/TTEST_PAIRED/local.template.sql index 174c344d768601914a15e2ee3278cd213916f7d5..b4b18c67e5b5cd267c2bf0f4c8e39018b84d48ff 100644 --- a/Exareme-Docker/src/mip-algorithms/TTEST_PAIRED/local.template.sql +++ b/Exareme-Docker/src/mip-algorithms/TTEST_PAIRED/local.template.sql @@ -12,6 +12,16 @@ var 'xnames' from select group_concat(xname) as xname from (select distinct xname from (select strsplitv(regexpr("\-",'%{x}',"+") ,'delimiter:+') as xname) where xname!=0); + +--Check input: number of variables is modulo 2 +select pairedttest_inputerrorchecking(no) from +(select count(*) as no from +(select strsplitv(xname,'dialect:csv') from +(select group_concat(xname) as xname from +(select distinct xname from (select strsplitv(regexpr("\-",'%{x}',"+") ,'delimiter:+') as xname) where xname!=0)))); + + + --Read dataset and Cast values of columns using cast function. var 'cast_x' from select create_complex_query("","tonumber(?) as ?", "," , "" , '%{xnames}'); drop table if exists localinputtblflat; diff --git a/Exareme-Docker/src/mip-algorithms/mipframework/algorithm.py b/Exareme-Docker/src/mip-algorithms/mipframework/algorithm.py index d5f91e9285559319d38b14d517ef92c7232090b0..ba6ca5f260a99c76ad2bf37ea1d17afa00fe1cc7 100644 --- a/Exareme-Docker/src/mip-algorithms/mipframework/algorithm.py +++ b/Exareme-Docker/src/mip-algorithms/mipframework/algorithm.py @@ -2,18 +2,24 @@ import json import logging import os import re -from string import capwords import warnings -from .loggingutils import logged -from .decorators import algorithm_methods_decorator -from .parameters import Parameters, parse_exareme_args -from .transfer import AddMe, MaxMe, MinMe, ConcatMe, DoNothing, TransferStruct -from .helpers import one_kwarg +from mipframework.loggingutils import logged +from mipframework.decorators import algorithm_methods_decorator +from mipframework.parameters import Parameters, parse_exareme_args +from mipframework.transfer import ( + AddMe, + MaxMe, + MinMe, + ConcatMe, + DoNothing, + TransferStruct, +) +from mipframework.helpers import one_kwarg _MAIN_METHODS = re.compile( r"""^((local_|global_) - (init|step(_[2-9])?|final|)? + (init|step(_[2-9])?|final|pure|)? |termination_condition)$""", re.VERBOSE, ) @@ -73,7 +79,7 @@ class Algorithm(object): def set_output(self): try: res = json.dumps(self.result.output()) - logging.debug("Algorithm output:\n {res}".format(res=res, indent=4)) + logging.debug("Algorithm output:\n {res}".format(res=res)) print(json.dumps(self.result.output(), allow_nan=True, indent=4)) except ValueError: logging.error("Result contains NaNs.") diff --git a/Exareme-Docker/src/mip-algorithms/mipframework/algorithmtest.py b/Exareme-Docker/src/mip-algorithms/mipframework/algorithmtest.py index 066cff68fbcf43bf112ae8c118613a0f8f5a77ed..1368a9d9c8f2fe726d1bb93b32f723f0f58637e2 100644 --- a/Exareme-Docker/src/mip-algorithms/mipframework/algorithmtest.py +++ b/Exareme-Docker/src/mip-algorithms/mipframework/algorithmtest.py @@ -31,9 +31,9 @@ class AlgorithmTest(object): __metaclass__ = abc.ABCMeta """ A base class for generating random test-cases for algorithm testing. - The test-cases are generated based on specifications gathered from - the algorithm's properties.json file, uniformly at random whenever - possible. The class must be subclassed for each algorithm and the + The test-cases are generated based on specifications gathered from + the algorithm's properties.json file, uniformly at random whenever + possible. The class must be subclassed for each algorithm and the `get_expected` method must be implemented by the subclass using some standard library for computing the expected results. """ diff --git a/Exareme-Docker/src/mip-algorithms/mipframework/data.py b/Exareme-Docker/src/mip-algorithms/mipframework/data.py index 43bf794e631eed5fc031f52222afcaf637a06f21..e1ff542a7f53e097a8178c5f8fc6343055cba301 100644 --- a/Exareme-Docker/src/mip-algorithms/mipframework/data.py +++ b/Exareme-Docker/src/mip-algorithms/mipframework/data.py @@ -2,12 +2,12 @@ import re import numpy as np import pandas as pd -from mipframework.constants import PRIVACY_THRESHOLD -from patsy import dmatrix, dmatrices +import patsy from sqlalchemy import between, not_, and_, or_, Table, select, create_engine, MetaData -from .loggingutils import log_this, repr_with_logging, logged -from .exceptions import PrivacyError +from mipframework.constants import PRIVACY_THRESHOLD +from mipframework.loggingutils import log_this, repr_with_logging, logged +from mipframework.exceptions import PrivacyError FILTER_OPERATORS = { "equal": lambda a, b: a == b, @@ -27,6 +27,7 @@ FILTER_CONDITIONS = { class AlgorithmData(object): def __init__(self, args): + log_this("AlgorithmData.__init__", args=args) db = DataBase( db_path=args.input_local_DB, data_table_name=args.data_table, @@ -37,66 +38,88 @@ class AlgorithmData(object): self.db = db self.full = db.read_data_from_db(args) self.metadata = db.read_metadata_from_db(args) - self.variables, self.covariables = self.build_variables( - args, self.metadata.is_categorical - ) + variables, covariables = self.build_variables(args) + if 1 in self.metadata.is_categorical.values(): + variables = self.add_missing_levels(args.y, args.coding, variables) + if covariables is not None: # truth value of dataframe is ambiguous + covariables = self.add_missing_levels(args.x, args.coding, covariables) + self.variables, self.covariables = variables, covariables def __repr__(self): repr_with_logging(self, variables=self.variables, covariables=self.covariables) - def build_variables(self, args, is_categorical): - log_this( - "AlgorithmData.build_variables", args=args, is_categorical=is_categorical - ) + def build_variables(self, args): + log_this("AlgorithmData.build_variables", args=args) from numpy import log as log from numpy import exp as exp # This line is needed to prevent import optimizer from removing above lines _ = log(exp(1)) - formula = get_formula(args, is_categorical) - # Create variables (and possibly covariables) + formula = self.get_formula(args) if args.formula_is_equation: if self.full.dropna().shape[0] == 0: return pd.DataFrame(), pd.DataFrame() - variables, covariables = dmatrices( + variables, covariables = patsy.dmatrices( formula, self.full, return_type="dataframe" ) - return variables, covariables else: if self.full.dropna().shape[0] == 0: return pd.DataFrame(), None - variables = dmatrix(formula, self.full, return_type="dataframe") - return variables, None - + variables = patsy.dmatrix(formula, self.full, return_type="dataframe") + covariables = None + return variables, covariables -@logged -def get_formula(args, is_categorical): - # Get formula from args or build if doesn't exist - if hasattr(args, "formula") and args.formula: - formula = args.formula - else: - if hasattr(args, "x") and args.x: - formula = "+".join(args.y) + "~" + "+".join(args.x) - if not args.intercept: - formula += "-1" + def add_missing_levels(self, varnames, coding, dmatrix): + log_this( + "AlgorithmData.add_missing_levels", + varnames=varnames, + coding=coding, + dmatrix=dmatrix.columns, + ) + categorical_variables = ( + var for var in varnames if self.metadata.is_categorical[var] + ) + all_var_levels = ( + "C({var}, {coding})[{level}]".format(var=var, coding=coding, level=level) + for var in categorical_variables + for level in self.metadata.enumerations[var] + ) + for var_level in all_var_levels: + if var_level in dmatrix.columns: + continue + missing_column = pd.Series(np.zeros((len(dmatrix),)), index=dmatrix.index) + dmatrix[var_level] = missing_column + return dmatrix + + def get_formula(self, args): + log_this("AlgorithmData.add_missing_levels", args=args) + is_categorical = self.metadata.is_categorical + # Get formula from args or build if doesn't exist + if hasattr(args, "formula") and args.formula: + formula = args.formula else: - formula = "+".join(args.y) + "-1" - # Process categorical vars - var_names = list(args.y) - if hasattr(args, "x") and args.x: - var_names.extend(args.x) - if 1 in is_categorical.values(): - if not hasattr(args, "coding") or not args.coding: - args.coding = "Treatment" - for var in var_names: - if is_categorical[var]: - formula = re.sub( - r"\b({})\b".format(var), - r"C(\g<0>, {})".format(args.coding), - formula, - ) - return formula + if hasattr(args, "x") and args.x: + formula = "+".join(args.y) + "~" + "+".join(args.x) + if not args.intercept: + formula += "-1" + else: + formula = "+".join(args.y) + "-1" + # Process categorical vars + var_names = list(args.y) + if hasattr(args, "x") and args.x: + var_names.extend(args.x) + if 1 in is_categorical.values(): + if not hasattr(args, "coding") or not args.coding: + args.coding = "Treatment" + for var in var_names: + if is_categorical[var]: + formula = re.sub( + r"\b({})\b".format(var), + r"C(\g<0>, {})".format(args.coding), + formula, + ) + return formula class AlgorithmMetadata(object): diff --git a/Exareme-Docker/src/mip-algorithms/mipframework/decorators.py b/Exareme-Docker/src/mip-algorithms/mipframework/decorators.py index a2adc0de49d45d4f3b4cdb9a63e1ca969a3e180d..35d4d7ee8b73423868831e678f4bbd5f33a0cd44 100644 --- a/Exareme-Docker/src/mip-algorithms/mipframework/decorators.py +++ b/Exareme-Docker/src/mip-algorithms/mipframework/decorators.py @@ -33,6 +33,9 @@ def algorithm_methods_decorator(func): wrapper = make_wrapper(node="global", step="last", state="load")(func) elif func_name == "termination_condition": wrapper = make_termination_wrapper(func) + # Purely local algorithms + elif func_name == "local_pure": + wrapper = make_pure_local_wrapper(func) # Error else: logging.error("Unknown function name.") @@ -106,3 +109,15 @@ def make_termination_wrapper(func): func(self) return wrapper + + +def make_pure_local_wrapper(func): + @wraps(func) + def wrapper(self): + self.data = AlgorithmData(self._args) + self.metadata = self.data.metadata + del self.data.metadata + func(self) + self.set_output() + + return wrapper diff --git a/Exareme-Docker/src/mip-algorithms/mipframework/funclib/__init__.py b/Exareme-Docker/src/mip-algorithms/mipframework/funclib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Exareme-Docker/src/mip-algorithms/mipframework/funclib/crossvalidation.py b/Exareme-Docker/src/mip-algorithms/mipframework/funclib/crossvalidation.py new file mode 100644 index 0000000000000000000000000000000000000000..9a90d838668c2b459d041314cdb971458020e613 --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/mipframework/funclib/crossvalidation.py @@ -0,0 +1,261 @@ +from __future__ import print_function +from __future__ import division +from __future__ import unicode_literals + +from collections import namedtuple + +import numpy as np +from sklearn.model_selection import KFold + + +def kfold_split_design_matrices(n_splits, *matrices): + train_sets = [] + test_sets = [] + kf = KFold(n_splits=n_splits, random_state=0) + for train_idx, test_idx in kf.split(matrices[0]): + train_sets.append([m[train_idx] for m in matrices]) + test_sets.append([m[test_idx] for m in matrices]) + return train_sets, test_sets + + +def kfold_split_design_matrix(X, n_splits): + kf = KFold(n_splits=n_splits) + train_sets = [] + test_sets = [] + for train_idx, test_idx in kf.split(X): + train_sets.append(X[train_idx]) + test_sets.append(X[test_idx]) + return train_sets, test_sets + + +def compute_classification_results(y, yhats): + true_positives = np.array( + [sum(1 if yi == yhi == 1 else 0 for yi, yhi in zip(y, yhat)) for yhat in yhats] + ) + true_negatives = np.array( + [sum(1 if yi == yhi == 0 else 0 for yi, yhi in zip(y, yhat)) for yhat in yhats] + ) + false_positives = np.array( + [ + sum(1 if yi == 0 and yhi == 1 else 0 for yi, yhi in zip(y, yhat)) + for yhat in yhats + ] + ) + false_negatives = np.array( + [ + sum(1 if yi == 1 and yhi == 0 else 0 for yi, yhi in zip(y, yhat)) + for yhat in yhats + ] + ) + return false_negatives, false_positives, true_negatives, true_positives + + +ConfusionMatrixSummary = namedtuple( + "ConfusionMatrixSummary", "accuracy precision recall confusion_mat f1" +) + + +def compute_confusion_matrix(tp, tn, fp, fn): + confusion_mat = { + "True Positives": tp, + "True Negatives": tn, + "False Positives": fp, + "False Negatives": fn, + } + accuracy = (tp + tn) / (tp + tn + fp + fn) + try: + precision = tp / (tp + fp) + except ZeroDivisionError: + precision = 1 + try: + recall = tp / (tp + fn) + except ZeroDivisionError: + recall = 1 + try: + f1 = 2 * (precision * recall) / (precision + recall) + except ZeroDivisionError: + f1 = 2 + return ConfusionMatrixSummary(accuracy, precision, recall, confusion_mat, f1) + + +def compute_roc(true_positives, true_negatives, false_positives, false_negatives): + fp_rate = [ + fp / (fp + tn) if fp != 0 or tn != 0 else 1 + for fp, tn in zip(false_positives, true_negatives) + ] + tp_rate = [ + tp / (tp + fn) if tp != 0 or fn != 0 else 1 + for tp, fn in zip(true_positives, false_negatives) + ] + roc_curve = list(zip(fp_rate, tp_rate)) + auc = np.trapz(tp_rate, fp_rate) + gini = 2 * auc - 1 + return roc_curve, auc, gini + + +class AdditiveMulticlassROCCurve(object): + def __init__( + self, + y_true=None, + y_pred_proba_per_class=None, + classes=None, + tp=None, + tn=None, + fp=None, + fn=None, + ): + if (tp, tn, fp, fn) == (None, None, None, None): + if len(y_true.shape) > 1: + y_true = y_true.flatten() + self.tp = [] + self.tn = [] + self.fp = [] + self.fn = [] + self.classes = classes + for ci, c in enumerate(classes): + y_pred_proba = y_pred_proba_per_class[:, ci] + thres = np.linspace(1.0, 0.0, num=2 ** 7 + 1) + self.tp.append( + ((y_true == c) & (y_pred_proba >= thres[:, None])).sum(axis=1) + ) + self.tn.append( + ((y_true != c) & (y_pred_proba < thres[:, None])).sum(axis=1) + ) + self.fp.append( + ((y_true != c) & (y_pred_proba >= thres[:, None])).sum(axis=1) + ) + self.fn.append( + ((y_true == c) & (y_pred_proba < thres[:, None])).sum(axis=1) + ) + elif tp and tn and fp and fn: + self.tp = tp + self.tn = tn + self.fp = fp + self.fn = fn + + def __add__(self, other): + result = AdditiveMulticlassROCCurve( + tp=[tp_1 + tp_2 for tp_1, tp_2 in zip(self.tp, other.tp)], + tn=[tn_1 + tn_2 for tn_1, tn_2 in zip(self.tn, other.tn)], + fp=[fp_1 + fp_2 for fp_1, fp_2 in zip(self.fp, other.fp)], + fn=[fn_1 + fn_2 for fn_1, fn_2 in zip(self.fn, other.fn)], + ) + if (self.classes == other.classes).all(): + result.classes = self.classes + else: + raise ValueError("classes do not agree") + return result + + def get_curves(self): + curves = [] + for ci, c in enumerate(self.classes): + tpr = self.tp[ci] / (self.tp[ci] + self.fn[ci]) + tpr[np.isnan(tpr)] = 1.0 + fpr = self.fp[ci] / (self.fp[ci] + self.tn[ci]) + fpr[np.isnan(fpr)] = 1.0 + curves.append((fpr.tolist(), tpr.tolist())) + return curves + + +class AdditiveMulticlassClassificationReport(object): + def __init__( + self, + y_true=None, + y_pred=None, + classes=None, + class_count=None, + tp=None, + tn=None, + fp=None, + fn=None, + ): + if tp is not None and tn is not None and fp is not None and fn is not None: + self.tp = tp + self.tn = tn + self.fp = fp + self.fn = fn + self.classes = classes + self.class_count = class_count + + elif (tp, tn, fp, fn) == (None, None, None, None): + if len(y_true.shape) > 1: + y_true = y_true.flatten() + self.classes = classes[:, np.newaxis] + _, self.class_count = np.unique(y_true, return_counts=True) + self.tp = ((y_true == self.classes) & (y_pred == self.classes)).sum(axis=1) + self.tn = ((y_true != self.classes) & (y_pred != self.classes)).sum(axis=1) + self.fp = ((y_true != self.classes) & (y_pred == self.classes)).sum(axis=1) + self.fn = ((y_true == self.classes) & (y_pred != self.classes)).sum(axis=1) + + def __add__(self, other): + return AdditiveMulticlassClassificationReport( + tp=self.tp + other.tp, + tn=self.tn + other.tn, + fp=self.fp + other.fp, + fn=self.fn + other.fn, + classes=self.classes, + class_count=self.class_count + other.class_count, + ) + + def get_values(self): + tp_sum = sum(self.tp) + tn_sum = sum(self.tn) + fp_sum = sum(self.fp) + fn_sum = sum(self.fn) + + precision = self.tp / (self.tp + self.fp) + precision[np.isnan(precision)] = 1.0 + precision_micro_avg = tp_sum / (tp_sum + fp_sum) + precision_micro_avg = ( + 1.0 if np.isnan(precision_micro_avg) else precision_micro_avg + ) + precicion_avgs = [ + precision_micro_avg, + precision.mean(), + np.average(precision, weights=self.class_count), + ] + + recall = self.tp / (self.tp + self.fn) + recall[np.isnan(recall)] = 1.0 + recall_micro_avg = tp_sum / (tp_sum + fn_sum) + recall_micro_avg = 1.0 if np.isnan(recall_micro_avg) else recall_micro_avg + recall_avgs = [ + recall_micro_avg, + recall.mean(), + np.average(recall, weights=self.class_count), + ] + + specificity = self.tn / (self.tn + self.fp) + specificity[np.isnan(specificity)] = 1.0 + specificity_micro_avg = tn_sum / (tn_sum + fp_sum) + specificity_micro_avg = ( + 1.0 if np.isnan(specificity_micro_avg) else specificity_micro_avg + ) + specificity_avgs = [ + specificity_micro_avg, + specificity.mean(), + np.average(specificity, weights=self.class_count), + ] + + f_score = 2.0 * (precision * recall) / (precision + recall) + f_score_micro_avg = ( + 2.0 + * (precision_micro_avg * recall_micro_avg) + / (precision_micro_avg + recall_micro_avg) + ) + f_score_avgs = [ + f_score_micro_avg, + f_score.mean(), + np.average(f_score, weights=self.class_count), + ] + + return ( + precision, + recall, + specificity, + f_score, + precicion_avgs, + recall_avgs, + specificity_avgs, + f_score_avgs, + ) diff --git a/Exareme-Docker/src/mip-algorithms/mipframework/highchart_server/app.py b/Exareme-Docker/src/mip-algorithms/mipframework/highchart_server/app.py index 8926da2d6958bfe0f7aed9efffe698c33779eab5..ea5a7c1c6d3c6be156fc5a5887debec27436cfa7 100755 --- a/Exareme-Docker/src/mip-algorithms/mipframework/highchart_server/app.py +++ b/Exareme-Docker/src/mip-algorithms/mipframework/highchart_server/app.py @@ -9,6 +9,8 @@ from PEARSON_CORRELATION import Pearson from LOGISTIC_REGRESSION import LogisticRegression from CALIBRATION_BELT import CalibrationBelt from KAPLAN_MEIER import KaplanMeier +from ANOVA_ONEWAY import Anova +from NAIVE_BAYES import NaiveBayes app = Flask(__name__) @@ -36,6 +38,12 @@ charts_info = { "title": "Kaplan-Meier Survival Curves", "url": "kaplan_meier_survival", }, + "anova_errorbars": {"title": "Anova Mean Plot", "url": "anova_errorbars"}, + "naive_bayes_confusion_matrix": { + "title": "NaiveBayes CM", + "url": "naive_bayes_confusion_matrix", + }, + "naive_bayes_roc": {"title": "NaiveBayes ROC", "url": "naive_bayes_roc",}, } @@ -47,6 +55,27 @@ def home(): ) +@app.route("/anova_errorbars") +def anova_errorbars(): + anova_args = [ + "-y", + "lefthippocampus", + "-x", + "alzheimerbroadcategory", + "-pathology", + "dementia", + "-dataset", + "adni", + "-filter", + "", + ] + result = get_algorithm_result(Anova, anova_args) + result = result["result"][3]["data"] + return render_template( + "highchart_layout.html", title="Anova Mean Plot", data=result + ) + + @app.route("/pca_scree_eigenvalues") def pca_scree_eigenvalues(): pca_args = [ @@ -293,5 +322,41 @@ def kaplan_meier_survival(): return render_template("highchart_layout.html", title="Kaplan Meier", data=result,) +nb_args = [ + "-x", + # "lefthippocampus,righthippocampus,leftaccumbensarea", + # "gender,alzheimerbroadcategory,agegroup", + "lefthippocampus,righthippocampus,leftaccumbensarea,apoe4,alzheimerbroadcategory", + "-y", + "agegroup", + "-alpha", + "1", + "-k", + "10", + "-pathology", + "dementia", + "-dataset", + "adni, ppmi", + "-filter", + "", +] + + +@app.route("/naive_bayes_confusion_matrix") +def naive_bayes_confusion_matrix(): + result = get_algorithm_result(NaiveBayes, nb_args) + result = result["result"][4]["data"] + return render_template( + "highchart_layout.html", title="NaiveBayes Confusion Martix", data=result + ) + + +@app.route("/naive_bayes_roc") +def naive_bayes_roc(): + result = get_algorithm_result(NaiveBayes, nb_args) + result = result["result"][5]["data"] + return render_template("highchart_layout.html", title="NaiveBayes ROC", data=result) + + if __name__ == "__main__": app.run(debug=True) diff --git a/Exareme-Docker/src/mip-algorithms/mipframework/highcharts/__init__.py b/Exareme-Docker/src/mip-algorithms/mipframework/highcharts/__init__.py index 7a49ef348c00d8c1a195374fed256369a5a85418..8b4ef7ffa25ec76d8be6024358f024f95a0ea718 100644 --- a/Exareme-Docker/src/mip-algorithms/mipframework/highcharts/__init__.py +++ b/Exareme-Docker/src/mip-algorithms/mipframework/highcharts/__init__.py @@ -5,6 +5,9 @@ from .user_defined import ( ScreePlot, CalibrationBeltPlot, SurvivalCurves, + LineWithErrorbars, + MultilabelConfisionMatrix, + MulticlassROCCurve, ) __all__ = [ @@ -14,4 +17,7 @@ __all__ = [ "ScreePlot", "CalibrationBeltPlot", "SurvivalCurves", + "LineWithErrorbars", + "MultilabelConfisionMatrix", + "MulticlassROCCurve", ] diff --git a/Exareme-Docker/src/mip-algorithms/mipframework/highcharts/user_defined.py b/Exareme-Docker/src/mip-algorithms/mipframework/highcharts/user_defined.py index 3a4c52cde0c2b2299cd9e654cc5796abdaea4fe9..dc256505d8a4ca45f1561a74b813329dab1088af 100644 --- a/Exareme-Docker/src/mip-algorithms/mipframework/highcharts/user_defined.py +++ b/Exareme-Docker/src/mip-algorithms/mipframework/highcharts/user_defined.py @@ -1,7 +1,7 @@ from colour import Color -from .core import Heatmap_, Area_, Column_, Bubble_, Line_ +from .core import Heatmap_, Area_, Column_, Bubble_, Line_, Errorbar_ from .core import ( Title, Axis, @@ -132,6 +132,46 @@ class ConfusionMatrix(HighchartTemplate): ) +class MultilabelConfisionMatrix(HighchartTemplate): + def __init__(self, title, confusion_matrix, classes): + min_val = 0 + max_val = confusion_matrix.max() + data = [ + { + "name": str(confusion_matrix[i, j]), + "x": i, + "y": j, + # "y": confusion_matrix.shape[1] - j - 1, + "value": confusion_matrix[i, j], + } + for i in range(confusion_matrix.shape[0]) + for j in range(confusion_matrix.shape[1]) + ] + data_labels = DataLabels( + format="{point.name}", + enabled=True, + color="#222222", + borderRadius=3, + backgroundColor="rgba(245, 255, 255, 0.5)", + borderWidth=2, + borderColor="#AAA", + padding=5, + ) + self.chart = ( + Heatmap_(title=Title(text=title)) + .set(xAxis=Axis(categories=classes)) + .set(yAxis=Axis(categories=list(classes), title=None,)) + .set( + colorAxis=ColorAxis( + min=min_val, max=max_val, minColor="#ffffff", maxColor="#0000ff" + ) + ) + .set(series=Series(data=data, borderWidth=1, dataLabels=data_labels)) + .set(legend=Legend(enabled=False)) + .set(tooltip=Tooltip(enabled=False)) + ) + + class ROC(HighchartTemplate): def __init__(self, title, roc_curve, auc, gini): self.chart = ( @@ -154,6 +194,40 @@ class ROC(HighchartTemplate): ) +class MulticlassROCCurve(HighchartTemplate): + def __init__(self, title, roc_curves, classes): + self.chart = ( + Line_(title=Title(text=title)) + .set( + xAxis=Axis(min=-0.05, max=1.05, title=Title(text="False Positive Rate")) + ) + .set( + yAxis=Axis(min=-0.05, max=1.05, title=Title(text="True Positive Rate")) + ) + .set(legend=Legend(enabled=True)) + ) + series = RenderableList( + [ + Series(data=map(list, zip(*curve)), name=class_) + for class_, curve in zip(classes, roc_curves) + ] + ) + series.append( + Series( + name="Bisector", + data=[[0, 0], [1, 1]], + zIndex=2, + color="#fc7938", + lineWidth=1.5, + dashStyle="Dash", + allowPointSelect=False, + marker={"enabled": False}, + label={"enabled": False}, + ) + ) + self.chart.set(series=series) + + class ScreePlot(HighchartTemplate): def __init__(self, title, data, xtitle): self.chart = ( @@ -422,6 +496,34 @@ class SurvivalCurves: } +class LineWithErrorbars(HighchartTemplate): + def __init__(self, title, data, categories, xname, yname): + self.chart = ( + Errorbar_(title=Title(text=title)) + .set(xAxis=Axis(categories=categories, title=Title(text=xname))) + .set(yAxis=Axis(title=Title(text=yname))) + .set( + series=RenderableList( + [ + Series( + data=[[p[0], p[2]] for p in data], + type="errorbar", + pointWidth=200, + lineWidth=2, + ), + Series( + data=[p[1] for p in data], + type="line", + color="#0A1E6E", + name="", + ), + ] + ) + ) + .set(legend=Legend(enabled=False)) + ) + + colors_dark = [ "#7cb5ec", "#434348", diff --git a/Exareme-Docker/src/mip-algorithms/mipframework/parameters.py b/Exareme-Docker/src/mip-algorithms/mipframework/parameters.py index 57599e7dbe2ef0b658c5b1768289d97949953d20..c91d16c5b343dd7aa25090bcc84e28fc24faf4b4 100644 --- a/Exareme-Docker/src/mip-algorithms/mipframework/parameters.py +++ b/Exareme-Docker/src/mip-algorithms/mipframework/parameters.py @@ -22,6 +22,7 @@ COMMON_ALGORITHM_ARGUMENTS = { "metadata_enumerations_column", "metadata_minValue_column", "metadata_maxValue_column", + "metadata_sqlType_column", } @@ -42,16 +43,28 @@ class Parameters(object): def parse_exareme_args(fp, cli_args): parser = ArgumentParser() # Add common arguments - for arg in COMMON_ALGORITHM_ARGUMENTS: - parser.add_argument("-" + arg) + for argname in COMMON_ALGORITHM_ARGUMENTS: + parser.add_argument("-" + argname) # Add algorithm specific arguments prop_path = os.path.join(fp, "properties.json") with open(prop_path, "r") as prop: params = json.load(prop)["parameters"] + algorithm_param_names = [] for p in params: name = "-" + p["name"] + algorithm_param_names.append(p["name"]) required = p["valueNotBlank"] parser.add_argument(name, required=required) + # Escape args starting with dash (e.g. see agegroup='-50y') + all_args = set(COMMON_ALGORITHM_ARGUMENTS) | set(algorithm_param_names) + escaped_args = [] # remember escaped args to undo later (see below) + for i, argname in enumerate(cli_args): + if argname.replace("-", "") in all_args: + continue + if argname.startswith("-"): + cli_args[i] = "\\" + argname + # arg name is one position begore its value + escaped_args.append(cli_args[i - 1].replace("-", "")) # Parse and process args, _ = parser.parse_known_args(cli_args) args.y = re.split(r"\s*,\s*", args.y) @@ -65,4 +78,9 @@ def parse_exareme_args(fp, cli_args): args.filter = json.loads(args.filter) if args.filter else None if hasattr(args, "coding"): args.coding = None if args.coding == "null" else args.coding + # Undo arg escaping (see above) + if escaped_args: + for argname in escaped_args: + argval_without_escape = getattr(args, argname).replace("\\", "") + setattr(args, argname, argval_without_escape) return args diff --git a/Exareme-Docker/src/mip-algorithms/mipframework/runner/runner.py b/Exareme-Docker/src/mip-algorithms/mipframework/runner/runner.py index 3167cdc3e8cd218831472588321d1b88aa58d904..d1fef37a617f9b3cf6779b617e300f4209fd6d10 100644 --- a/Exareme-Docker/src/mip-algorithms/mipframework/runner/runner.py +++ b/Exareme-Docker/src/mip-algorithms/mipframework/runner/runner.py @@ -19,6 +19,13 @@ ALGORITHM_TYPES = { "CalibrationBelt": "iterative", "DescriptiveStats": "local-global", "KaplanMeier": "local-global", + "ThreeC": "local", + "Anova": "local-global", + "NaiveBayes": "multiple-local-global", + "NaiveBayesTrain": "local-global", + "GaussianNaiveBayesTrain": "local-global", + "CategoricalNaiveBayesTrain": "local-global", + "MixedNaiveBayesTrain": "local-global", } @@ -127,6 +134,11 @@ class RunnerABC(object): local-global, multiple-local-global, iterative)""" +class LocalRunner(RunnerABC): + def run(self): + self.workers[0].local_pure() + + class LocalGlobalRunner(RunnerABC): def run(self): self.execute_runner_steps("local_") @@ -157,6 +169,11 @@ class IterativeRunner(RunnerABC): def create_runner(algorithm_class, algorithm_args, num_workers=3): alg_type = ALGORITHM_TYPES[algorithm_class.__name__] + if alg_type == "local" and num_workers > 1: + raise ValueError( + "Purely local algorithms should only have one worker. Please set" + " num_workers=1." + ) if alg_type == "local-global": return LocalGlobalRunner( alg_cls=algorithm_class, algorithm_args=algorithm_args, num_wrk=num_workers @@ -169,6 +186,10 @@ def create_runner(algorithm_class, algorithm_args, num_workers=3): return IterativeRunner( alg_cls=algorithm_class, algorithm_args=algorithm_args, num_wrk=num_workers ) + elif alg_type == "local": + return LocalRunner( + alg_cls=algorithm_class, algorithm_args=algorithm_args, num_wrk=num_workers + ) def split_db(num_wrk): diff --git a/Exareme-Docker/src/mip-algorithms/tests/__init__.py b/Exareme-Docker/src/mip-algorithms/tests/__init__.py index e963674afaa0f824297f1c8b215d02d53f9d92ec..8fb2ad3540579b14473f2cfccece906309191330 100644 --- a/Exareme-Docker/src/mip-algorithms/tests/__init__.py +++ b/Exareme-Docker/src/mip-algorithms/tests/__init__.py @@ -1 +1 @@ -vm_url = "http://127.0.0.1:9090/mining/query/" +vm_url = "http://88.197.53.100:9090/mining/query/" diff --git a/Exareme-Docker/src/mip-algorithms/tests/algorithm_tests/expected/anova_expected.json b/Exareme-Docker/src/mip-algorithms/tests/algorithm_tests/expected/anova_expected.json new file mode 100644 index 0000000000000000000000000000000000000000..6e4c3101b66a74faa001092d95ce9d0af29896ba --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/tests/algorithm_tests/expected/anova_expected.json @@ -0,0 +1,6974 @@ +{ + "test_cases": [ + { + "input": [ + { + "name": "x", + "value": "alzheimerbroadcategory" + }, + { + "name": "y", + "value": "rightmprgprecentralgyrusmedialsegment" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1777.0, + "df_explained": 2.0, + "ss_residual": 198.9195428117932, + "ss_explained": 3.9913221690837792, + "ms_residual": 0.11194121711412111, + "ms_explained": 1.9956610845418896, + "p_value": 2.1585168508296565e-08, + "f_stat": 17.827759390067786, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "CN", + "meanA": 2.5115315765765756, + "meanB": 2.523281940298506, + "diff": -0.011750363721930501, + "se": 0.030363313807373782, + "t_stat": -0.38699213783038744, + "p_tukey": 0.9 + }, + { + "groupA": "AD", + "groupB": "Other", + "meanA": 2.5115315765765756, + "meanB": 2.623752418604646, + "diff": -0.11222084202807059, + "se": 0.02431081763388561, + "t_stat": -4.6160867033798025, + "p_tukey": 0.001 + }, + { + "groupA": "CN", + "groupB": "Other", + "meanA": 2.523281940298506, + "meanB": 2.623752418604646, + "diff": -0.10047047830614009, + "se": 0.022460348303901, + "t_stat": -4.473237767585732, + "p_tukey": 0.001 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "parkinsonbroadcategory" + }, + { + "name": "y", + "value": "rightptplanumtemporale" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 711.0, + "df_explained": 2.0, + "ss_residual": 48.13362091206837, + "ss_explained": 0.77270849018639, + "ms_residual": 0.06769848229545482, + "ms_explained": 0.386354245093195, + "p_value": 0.0034767355734642716, + "f_stat": 5.706985326599179, + "tukey_test": [ + { + "groupA": "CN", + "groupB": "Other", + "meanA": 1.918540437158471, + "meanB": 1.8720436507936515, + "diff": 0.046496786364819354, + "se": 0.03012020258483027, + "t_stat": 1.5437076239399858, + "p_tukey": 0.27158201255300674 + }, + { + "groupA": "CN", + "groupB": "PD", + "meanA": 1.918540437158471, + "meanB": 1.9586412345679025, + "diff": -0.04010079740943162, + "se": 0.02317528442397908, + "t_stat": -1.7303260092005601, + "p_tukey": 0.19476210071180244 + }, + { + "groupA": "Other", + "groupB": "PD", + "meanA": 1.8720436507936515, + "meanB": 1.9586412345679025, + "diff": -0.08659758377425097, + "se": 0.026541407054561927, + "t_stat": -3.2627352271200185, + "p_tukey": 0.003312046060991891 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs3851179_a" + }, + { + "name": "y", + "value": "rightliglingualgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 625.8194563879817, + "ss_explained": 6.381830992388592, + "ms_residual": 0.7039588935747826, + "ms_explained": 3.190915496194296, + "p_value": 0.011000008906427524, + "f_stat": 4.532815090935874, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 7.847465959079275, + "meanB": 7.970860712468184, + "diff": -0.12339475338890882, + "se": 0.05993037945346409, + "t_stat": -2.0589683314907887, + "p_tukey": 0.09912523482775504 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 7.847465959079275, + "meanB": 7.720133240740741, + "diff": 0.12733271833853443, + "se": 0.09120605397066205, + "t_stat": 1.3960994122110921, + "p_tukey": 0.3441324413495922 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 7.970860712468184, + "meanB": 7.720133240740741, + "diff": 0.25072747172744325, + "se": 0.0911558111558297, + "t_stat": 2.7505374429593723, + "p_tukey": 0.016702274682755536 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs17125944_c" + }, + { + "name": "y", + "value": "leftcocentraloperculum" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,adni,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 885.0, + "df_explained": 2.0, + "ss_residual": 168.37544824030303, + "ss_explained": 0.815465827513141, + "ms_residual": 0.1902547437743537, + "ms_explained": 0.4077329137565705, + "p_value": 0.11790024452391233, + "f_stat": 2.1430893425719293, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 4.005246824324322, + "meanB": 4.039259078014184, + "diff": -0.034012253689861716, + "se": 0.04008023738742155, + "t_stat": -0.8486040978523705, + "p_tukey": 0.6583051719726276 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 4.005246824324322, + "meanB": 4.3232485714285716, + "diff": -0.31800174710424933, + "se": 0.16563921672032392, + "t_stat": -1.9198457551340893, + "p_tukey": 0.13366339054589427 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 4.039259078014184, + "meanB": 4.3232485714285716, + "diff": -0.2839894934143876, + "se": 0.16890403646613525, + "t_stat": -1.6813659362801945, + "p_tukey": 0.21315764166409124 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "agegroup" + }, + { + "name": "y", + "value": "rightitginferiortemporalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 432.0, + "df_explained": 4.0, + "ss_residual": 776.327715339041, + "ss_explained": 47.927586731714555, + "ms_residual": 1.7970548966181505, + "ms_explained": 11.981896682928639, + "p_value": 3.255744225877907e-05, + "f_stat": 6.667518452261632, + "tukey_test": [ + { + "groupA": "+80y", + "groupB": "-50y", + "meanA": 10.173459574468085, + "meanB": 10.936498765432097, + "diff": -0.7630391909640117, + "se": 0.24580698421231925, + "t_stat": -3.1042209537257324, + "p_tukey": 0.017284750472305155 + }, + { + "groupA": "+80y", + "groupB": "50-59y", + "meanA": 10.173459574468085, + "meanB": 11.579363636363636, + "diff": -1.4059040618955514, + "se": 0.3462939850026085, + "t_stat": -4.059857008157278, + "p_tukey": 0.001 + }, + { + "groupA": "+80y", + "groupB": "60-69y", + "meanA": 10.173459574468085, + "meanB": 11.251329914529913, + "diff": -1.0778703400618284, + "se": 0.23150519244108222, + "t_stat": -4.655922956614224, + "p_tukey": 0.001 + }, + { + "groupA": "+80y", + "groupB": "70-79y", + "meanA": 10.173459574468085, + "meanB": 10.909298823529408, + "diff": -0.735839249061323, + "se": 0.22092107577802755, + "t_stat": -3.3307788605948314, + "p_tukey": 0.008316122346362809 + }, + { + "groupA": "-50y", + "groupB": "50-59y", + "meanA": 10.936498765432097, + "meanB": 11.579363636363636, + "diff": -0.6428648709315397, + "se": 0.3222889641359985, + "t_stat": -1.9946847161054684, + "p_tukey": 0.27033880447157577 + }, + { + "groupA": "-50y", + "groupB": "60-69y", + "meanA": 10.936498765432097, + "meanB": 11.251329914529913, + "diff": -0.3148311490978166, + "se": 0.19376611283207038, + "t_stat": -1.624799839849544, + "p_tukey": 0.4828441459499154 + }, + { + "groupA": "-50y", + "groupB": "70-79y", + "meanA": 10.936498765432097, + "meanB": 10.909298823529408, + "diff": 0.027199941902688707, + "se": 0.18098832580525487, + "t_stat": 0.15028561528303266, + "p_tukey": 0.9 + }, + { + "groupA": "50-59y", + "groupB": "60-69y", + "meanA": 11.579363636363636, + "meanB": 11.251329914529913, + "diff": 0.32803372183372304, + "se": 0.31151846982720316, + "t_stat": 1.0530153220631855, + "p_tukey": 0.8076619279646386 + }, + { + "groupA": "50-59y", + "groupB": "70-79y", + "meanA": 11.579363636363636, + "meanB": 10.909298823529408, + "diff": 0.6700648128342284, + "se": 0.303735451732623, + "t_stat": 2.206080353847148, + "p_tukey": 0.17922095973830998 + }, + { + "groupA": "60-69y", + "groupB": "70-79y", + "meanA": 11.251329914529913, + "meanB": 10.909298823529408, + "diff": 0.3420310910005053, + "se": 0.16102904929675343, + "t_stat": 2.1240334740484688, + "p_tukey": 0.2117355530538847 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs3851179_a" + }, + { + "name": "y", + "value": "rightofugoccipitalfusiformgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,adni,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 185.00664039583233, + "ss_explained": 2.605106515710213, + "ms_residual": 0.2081064571381691, + "ms_explained": 1.3025532578551064, + "p_value": 0.001998389457223509, + "f_stat": 6.259071802804735, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 4.069912838874676, + "meanB": 4.157771882951655, + "diff": -0.08785904407697931, + "se": 0.032584882202882255, + "t_stat": -2.696313079481007, + "p_tukey": 0.019541153692732904 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 4.069912838874676, + "meanB": 4.006690462962966, + "diff": 0.06322237591171032, + "se": 0.049589849954670376, + "t_stat": 1.2749055697789227, + "p_tukey": 0.4114594877799299 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 4.157771882951655, + "meanB": 4.006690462962966, + "diff": 0.15108141998868962, + "se": 0.04956253232014539, + "t_stat": 3.0482990459968984, + "p_tukey": 0.006694666304025709 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs744373_c" + }, + { + "name": "y", + "value": "rightaccumbensarea" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,edsd,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 835.0, + "df_explained": 2.0, + "ss_residual": 1.9017815634103508, + "ss_explained": 0.014214782061865187, + "ms_residual": 0.0022775827106710786, + "ms_explained": 0.0071073910309325935, + "p_value": 0.04464643986241397, + "f_stat": 3.12058525806004, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 0.37830423844282207, + "meanB": 0.3707878975903614, + "diff": 0.007516340852460668, + "se": 0.003521612224604779, + "t_stat": 2.134346535926228, + "p_tukey": 0.083706688508146 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 0.37830423844282207, + "meanB": 0.3681942, + "diff": 0.010110038442822045, + "se": 0.005432873882402533, + "t_stat": 1.860900632272945, + "p_tukey": 0.15095905377435948 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 0.3707878975903614, + "meanB": 0.3681942, + "diff": 0.002593697590361377, + "se": 0.005552904081355335, + "t_stat": 0.46708849142020753, + "p_tukey": 0.8765901674071936 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "gender" + }, + { + "name": "y", + "value": "rightttgtransversetemporalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1501.0, + "df_explained": 1.0, + "ss_residual": 51.80347034429436, + "ss_explained": 9.655162259949833, + "ms_residual": 0.03451263847054921, + "ms_explained": 9.655162259949833, + "p_value": 1.020730260183743e-57, + "f_stat": 279.7572914684256, + "tukey_test": [ + { + "groupA": "F", + "groupB": "M", + "meanA": 1.4097647461645744, + "meanB": 1.5702327417302817, + "diff": -0.16046799556570734, + "se": 0.009593955958749017, + "t_stat": -16.725946653879703, + "p_tukey": 0.001 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs3865444_t" + }, + { + "name": "y", + "value": "leftventraldc" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,edsd,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 185.03902577068055, + "ss_explained": 0.5749313061834264, + "ms_residual": 0.2081428861312492, + "ms_explained": 0.2874656530917132, + "p_value": 0.25184121150330135, + "f_stat": 1.3810976605294365, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 4.711005022321428, + "meanB": 4.6710851344086, + "diff": 0.03991988791282797, + "se": 0.032002008436855356, + "t_stat": 1.2474182047541094, + "p_tukey": 0.4274179250013759 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 4.711005022321428, + "meanB": 4.7544820833333326, + "diff": -0.04347706101190418, + "se": 0.05792648854395146, + "t_stat": -0.7505557838002931, + "p_tukey": 0.7143971551569286 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 4.6710851344086, + "meanB": 4.7544820833333326, + "diff": -0.08339694892473215, + "se": 0.0587400826177421, + "t_stat": -1.419762199986123, + "p_tukey": 0.3317063035141157 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "ppmicategory" + }, + { + "name": "y", + "value": "lefthippocampus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 710.0, + "df_explained": 3.0, + "ss_residual": 58.577194053819056, + "ss_explained": 2.149971727259616, + "ms_residual": 0.08250309021664656, + "ms_explained": 0.7166572424198719, + "p_value": 1.1507476911054115e-05, + "f_stat": 8.686429084510495, + "tukey_test": [ + { + "groupA": "GENPD", + "groupB": "HC", + "meanA": 3.083090123456791, + "meanB": 3.207463934426229, + "diff": -0.12437381096943811, + "se": 0.038332665285657166, + "t_stat": -3.244590743758555, + "p_tukey": 0.006742410888206707 + }, + { + "groupA": "GENPD", + "groupB": "PD", + "meanA": 3.083090123456791, + "meanB": 3.246466419753088, + "diff": -0.16337629629629724, + "se": 0.03496094968975498, + "t_stat": -4.673108074754998, + "p_tukey": 0.001 + }, + { + "groupA": "GENPD", + "groupB": "PRODROMA", + "meanA": 3.083090123456791, + "meanB": 3.127568888888889, + "diff": -0.044478765432097944, + "se": 0.05340373277361502, + "t_stat": -0.8328774623423589, + "p_tukey": 0.8177769796323241 + }, + { + "groupA": "HC", + "groupB": "PD", + "meanA": 3.207463934426229, + "meanB": 3.246466419753088, + "diff": -0.039002485326859127, + "se": 0.025584133607163117, + "t_stat": -1.524479426418376, + "p_tukey": 0.4247804450836846 + }, + { + "groupA": "HC", + "groupB": "PRODROMA", + "meanA": 3.207463934426229, + "meanB": 3.127568888888889, + "diff": 0.07989504553734017, + "se": 0.04779370840785718, + "t_stat": 1.671664497250136, + "p_tukey": 0.33976771305778986 + }, + { + "groupA": "PD", + "groupB": "PRODROMA", + "meanA": 3.246466419753088, + "meanB": 3.127568888888889, + "diff": 0.1188975308641993, + "se": 0.045134391972005465, + "t_stat": 2.6343000463581143, + "p_tukey": 0.0427209848806418 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs610932_a" + }, + { + "name": "y", + "value": "rightsmcsupplementarymotorcortex" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 886.0, + "df_explained": 2.0, + "ss_residual": 326.5069971837741, + "ss_explained": 0.46752313107381754, + "ms_residual": 0.3685180555121604, + "ms_explained": 0.23376156553690877, + "p_value": 0.5305320272780546, + "f_stat": 0.6343286632510604, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 5.287295508771931, + "meanB": 5.238108954248361, + "diff": 0.0491865545235699, + "se": 0.0457811903455023, + "t_stat": 1.0743834782880903, + "p_tukey": 0.5291323579986108 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 5.287295508771931, + "meanB": 5.238315034482756, + "diff": 0.0489804742891744, + "se": 0.06192374051955312, + "t_stat": 0.7909805492726697, + "p_tukey": 0.6912724906072754 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 5.238108954248361, + "meanB": 5.238315034482756, + "diff": -0.00020608023439550038, + "se": 0.05783057505986658, + "t_stat": -0.0035635169489870162, + "p_tukey": 0.9 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "neurodegenerativescategories" + }, + { + "name": "y", + "value": "rightfofrontaloperculum" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,edsd,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1417.0, + "df_explained": 2.0, + "ss_residual": 63.19626257845239, + "ss_explained": 2.8336736937793607, + "ms_residual": 0.04459863273002992, + "ms_explained": 1.4168368468896804, + "p_value": 3.187433557531707e-14, + "f_stat": 31.768616214453395, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "MCI", + "meanA": 1.8865703030303034, + "meanB": 1.947437254601226, + "diff": -0.06086695157092259, + "se": 0.013829824949210088, + "t_stat": -4.4011368035716965, + "p_tukey": 0.001 + }, + { + "groupA": "AD", + "groupB": "PD", + "meanA": 1.8865703030303034, + "meanB": 2.008157777777777, + "diff": -0.12158747474747367, + "se": 0.015263725888689622, + "t_stat": -7.965779498016905, + "p_tukey": 0.001 + }, + { + "groupA": "MCI", + "groupB": "PD", + "meanA": 1.947437254601226, + "meanB": 2.008157777777777, + "diff": -0.06072052317655108, + "se": 0.013361245890653963, + "t_stat": -4.5445255385221515, + "p_tukey": 0.001 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs190982_g" + }, + { + "name": "y", + "value": "rightpcggposteriorcingulategyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 533.0, + "df_explained": 2.0, + "ss_residual": 93.09072589193534, + "ss_explained": 0.15085200715101943, + "ms_residual": 0.17465426996610758, + "ms_explained": 0.07542600357550971, + "p_value": 0.6495279682115171, + "f_stat": 0.4318589152738519, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 4.01314256880734, + "meanB": 4.031888542510119, + "diff": -0.018745973702778862, + "se": 0.03883643500047875, + "t_stat": -0.4826903834645938, + "p_tukey": 0.867805081513163 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 4.01314256880734, + "meanB": 3.9808101408450702, + "diff": 0.03233242796226987, + "se": 0.05710591625469825, + "t_stat": 0.5661835074681917, + "p_tukey": 0.8200374628219905 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 4.031888542510119, + "meanB": 3.9808101408450702, + "diff": 0.051078401665048734, + "se": 0.05627629698951755, + "t_stat": 0.9076361522962854, + "p_tukey": 0.6246804106522338 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "apoe4" + }, + { + "name": "y", + "value": "minimentalstate" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1058.0, + "df_explained": 2.0, + "ss_residual": 7310.5116962417105, + "ss_explained": 453.18293146800806, + "ms_residual": 6.909746404765322, + "ms_explained": 226.59146573400403, + "p_value": 1.5211301089093965e-14, + "f_stat": 32.79302198091304, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 27.629151291512915, + "meanB": 26.529113924050634, + "diff": 1.1000373674622814, + "se": 0.1739012319557042, + "t_stat": 6.325644476989565, + "p_tukey": 0.001 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 27.629151291512915, + "meanB": 25.89516129032258, + "diff": 1.7339900011903353, + "se": 0.26167225862066584, + "t_stat": 6.626571767028695, + "p_tukey": 0.001 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 26.529113924050634, + "meanB": 25.89516129032258, + "diff": 0.6339526337280539, + "se": 0.27058601257562476, + "t_stat": 2.342887674398445, + "p_tukey": 0.050527480966974014 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs3851179_a" + }, + { + "name": "y", + "value": "leftitginferiortemporalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 1335.1765545214294, + "ss_explained": 13.056086992154256, + "ms_residual": 1.5018858881005954, + "ms_explained": 6.528043496077128, + "p_value": 0.013227593283147555, + "f_stat": 4.346564241530368, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 10.46438294117647, + "meanB": 10.703375114503816, + "diff": -0.23899217332734501, + "se": 0.08753703770039661, + "t_stat": -2.730183469828134, + "p_tukey": 0.017721658990476397 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 10.46438294117647, + "meanB": 10.444158611111108, + "diff": 0.020224330065362395, + "se": 0.1332197102328336, + "t_stat": 0.15181184548454202, + "p_tukey": 0.9 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 10.703375114503816, + "meanB": 10.444158611111108, + "diff": 0.2592165033927074, + "se": 0.1331463232925829, + "t_stat": 1.946854385329823, + "p_tukey": 0.12628891219218907 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "edsdcategory" + }, + { + "name": "y", + "value": "rightptplanumtemporale" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,adni,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 365.0, + "df_explained": 2.0, + "ss_residual": 21.212179236998047, + "ss_explained": 1.1256271490888985, + "ms_residual": 0.05811555955341931, + "ms_explained": 0.5628135745444492, + "p_value": 7.978472534858899e-05, + "f_stat": 9.684387087886781, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "CN", + "meanA": 1.7660042553191488, + "meanB": 1.879070860927152, + "diff": -0.11306660560800319, + "se": 0.02823186753213468, + "t_stat": -4.004928312989075, + "p_tukey": 0.001 + }, + { + "groupA": "AD", + "groupB": "MCI", + "meanA": 1.7660042553191488, + "meanB": 1.881117105263158, + "diff": -0.11511284994400905, + "se": 0.034305181885655595, + "t_stat": -3.355552823701613, + "p_tukey": 0.0025177531630641292 + }, + { + "groupA": "CN", + "groupB": "MCI", + "meanA": 1.879070860927152, + "meanB": 1.881117105263158, + "diff": -0.0020462443360058646, + "se": 0.03390500951920273, + "t_stat": -0.060352271390660905, + "p_tukey": 0.9 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs1476679_c" + }, + { + "name": "y", + "value": "leftputamen" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,edsd,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 532.0, + "df_explained": 2.0, + "ss_residual": 88.30939017314375, + "ss_explained": 0.05205030996726643, + "ms_residual": 0.1659950943104206, + "ms_explained": 0.026025154983633215, + "p_value": 0.8549293124257558, + "f_stat": 0.15678267536608428, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 3.8019472695035454, + "meanB": 3.822840970873786, + "diff": -0.020893701370240603, + "se": 0.0373421546633095, + "t_stat": -0.5595205086215791, + "p_tukey": 0.8238491798421284 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 3.8019472695035454, + "meanB": 3.8093727659574466, + "diff": -0.007425496453901115, + "se": 0.0641906971721772, + "t_stat": -0.11567870082457402, + "p_tukey": 0.9 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 3.822840970873786, + "meanB": 3.8093727659574466, + "diff": 0.013468204916339488, + "se": 0.0658605492380253, + "t_stat": 0.20449578802728044, + "p_tukey": 0.9 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "agegroup" + }, + { + "name": "y", + "value": "rightputamen" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,adni,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 2212.0, + "df_explained": 4.0, + "ss_residual": 573.8040649926654, + "ss_explained": 53.55245514655646, + "ms_residual": 0.2594050926729952, + "ms_explained": 13.388113786639115, + "p_value": 1.3219537387970987e-41, + "f_stat": 51.610836351297486, + "tukey_test": [ + { + "groupA": "+80y", + "groupB": "-50y", + "meanA": 3.5624659515570953, + "meanB": 3.8842307975460133, + "diff": -0.32176484598891797, + "se": 0.049890257192307, + "t_stat": -6.449452540375631, + "p_tukey": 0.001 + }, + { + "groupA": "+80y", + "groupB": "50-59y", + "meanA": 3.5624659515570953, + "meanB": 4.0955162650602395, + "diff": -0.5330503135031441, + "se": 0.04403842637007171, + "t_stat": -12.104208924808503, + "p_tukey": 0.001 + }, + { + "groupA": "+80y", + "groupB": "60-69y", + "meanA": 3.5624659515570953, + "meanB": 3.911805206738132, + "diff": -0.3493392551810368, + "se": 0.03598397930803583, + "t_stat": -9.708188529972377, + "p_tukey": 0.001 + }, + { + "groupA": "+80y", + "groupB": "70-79y", + "meanA": 3.5624659515570953, + "meanB": 3.7143910312862123, + "diff": -0.15192507972911695, + "se": 0.034614747605325465, + "t_stat": -4.389027516865191, + "p_tukey": 0.001 + }, + { + "groupA": "-50y", + "groupB": "50-59y", + "meanA": 3.8842307975460133, + "meanB": 4.0955162650602395, + "diff": -0.21128546751422617, + "se": 0.05131500567195209, + "t_stat": -4.117420718316538, + "p_tukey": 0.001 + }, + { + "groupA": "-50y", + "groupB": "60-69y", + "meanA": 3.8842307975460133, + "meanB": 3.911805206738132, + "diff": -0.027574409192118843, + "se": 0.04459477073381361, + "t_stat": -0.6183327941455427, + "p_tukey": 0.9 + }, + { + "groupA": "-50y", + "groupB": "70-79y", + "meanA": 3.8842307975460133, + "meanB": 3.7143910312862123, + "diff": 0.16983976625980102, + "se": 0.043497443163255385, + "t_stat": 3.904591946297978, + "p_tukey": 0.001 + }, + { + "groupA": "50-59y", + "groupB": "60-69y", + "meanA": 4.0955162650602395, + "meanB": 3.911805206738132, + "diff": 0.18371105832210732, + "se": 0.03793466503396446, + "t_stat": 4.842828008567448, + "p_tukey": 0.001 + }, + { + "groupA": "50-59y", + "groupB": "70-79y", + "meanA": 4.0955162650602395, + "meanB": 3.7143910312862123, + "diff": 0.3811252337740272, + "se": 0.03663840602671355, + "t_stat": 10.402342107791034, + "p_tukey": 0.001 + }, + { + "groupA": "60-69y", + "groupB": "70-79y", + "meanA": 3.911805206738132, + "meanB": 3.7143910312862123, + "diff": 0.19741417545191986, + "se": 0.026416596409591528, + "t_stat": 7.473111690506855, + "p_tukey": 0.001 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "dataset" + }, + { + "name": "y", + "value": "leftporgposteriororbitalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1501.0, + "df_explained": 1.0, + "ss_residual": 105.44075444880504, + "ss_explained": 0.08469466432330075, + "ms_residual": 0.07024700496256166, + "ms_explained": 0.08469466432330075, + "p_value": 0.27236758949165857, + "f_stat": 1.205669399975686, + "tukey_test": [ + { + "groupA": "adni", + "groupB": "edsd", + "meanA": 2.428203696060038, + "meanB": 2.411673112128146, + "diff": 0.016530583931892107, + "se": 0.01505476829303654, + "t_stat": 1.098029781005543, + "p_tukey": 0.272365142398 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs3764650_g" + }, + { + "name": "y", + "value": "leftppplanumpolare" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 50.93674020446667, + "ss_explained": 0.12453684534982648, + "ms_residual": 0.05729667064619423, + "ms_explained": 0.06226842267491324, + "p_value": 0.3377512123370682, + "f_stat": 1.0867720929095424, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 2.274898521617851, + "meanB": 2.2565345751633994, + "diff": 0.018363946454451607, + "se": 0.021316653536157887, + "t_stat": 0.8614835543159803, + "p_tukey": 0.6509351304386615 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 2.274898521617851, + "meanB": 2.333492727272727, + "diff": -0.05859420565487605, + "se": 0.051810286573449583, + "t_stat": -1.130937686897546, + "p_tukey": 0.4966211831210402 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 2.2565345751633994, + "meanB": 2.333492727272727, + "diff": -0.07695815210932766, + "se": 0.054579136707360386, + "t_stat": -1.4100287536968188, + "p_tukey": 0.33678573649882326 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "dataset" + }, + { + "name": "y", + "value": "leftaccumbensarea" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,adni,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 2214.0, + "df_explained": 2.0, + "ss_residual": 6.848420119385585, + "ss_explained": 1.4506519357358785, + "ms_residual": 0.0030932340195960186, + "ms_explained": 0.7253259678679392, + "p_value": 4.29408982720317e-93, + "f_stat": 234.4879059498602, + "tukey_test": [ + { + "groupA": "adni", + "groupB": "edsd", + "meanA": 0.4106431716697937, + "meanB": 0.3908437757437071, + "diff": 0.019799395926086594, + "se": 0.003159123167316401, + "t_stat": 6.267370684032463, + "p_tukey": 0.001 + }, + { + "groupA": "adni", + "groupB": "ppmi", + "meanA": 0.4106431716697937, + "meanB": 0.457287661064426, + "diff": -0.04664448939463228, + "se": 0.002689606105047962, + "t_stat": -17.342498333524755, + "p_tukey": 0.001 + }, + { + "groupA": "edsd", + "groupB": "ppmi", + "meanA": 0.3908437757437071, + "meanB": 0.457287661064426, + "diff": -0.06644388532071888, + "se": 0.003377957851906698, + "t_stat": -19.669838474513362, + "p_tukey": 0.001 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "parkinsonbroadcategory" + }, + { + "name": "y", + "value": "csfglobal" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 711.0, + "df_explained": 2.0, + "ss_residual": 134.98490278412316, + "ss_explained": 3.651270828726745, + "ms_residual": 0.18985218394391443, + "ms_explained": 1.8256354143633724, + "p_value": 7.573030146454664e-05, + "f_stat": 9.616088561313028, + "tukey_test": [ + { + "groupA": "CN", + "groupB": "Other", + "meanA": 1.2436684699453553, + "meanB": 1.4393899999999997, + "diff": -0.19572153005464443, + "se": 0.05044013275897541, + "t_stat": -3.880273888054297, + "p_tukey": 0.001 + }, + { + "groupA": "CN", + "groupB": "PD", + "meanA": 1.2436684699453553, + "meanB": 1.3907303456790117, + "diff": -0.14706187573365637, + "se": 0.0388099787768777, + "t_stat": -3.789279983355034, + "p_tukey": 0.001 + }, + { + "groupA": "Other", + "groupB": "PD", + "meanA": 1.4393899999999997, + "meanB": 1.3907303456790117, + "diff": 0.048659654320988066, + "se": 0.04444698177814909, + "t_stat": 1.0947797212387995, + "p_tukey": 0.5175189999679977 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs2718058_g" + }, + { + "name": "y", + "value": "leftscasubcallosalarea" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 549.0, + "df_explained": 2.0, + "ss_residual": 10.6352445536079, + "ss_explained": 0.031173327860387626, + "ms_residual": 0.019372030152291254, + "ms_explained": 0.015586663930193813, + "p_value": 0.44779514494098926, + "f_stat": 0.8045963075455093, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 1.2270258810572683, + "meanB": 1.2288193320000005, + "diff": -0.0017934509427321998, + "se": 0.012760386376873548, + "t_stat": -0.14054832587064792, + "p_tukey": 0.9 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 1.2270258810572683, + "meanB": 1.2497624666666665, + "diff": -0.022736585609398174, + "se": 0.018537342723319822, + "t_stat": -1.2265288476754406, + "p_tukey": 0.4398680603988344 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 1.2288193320000005, + "meanB": 1.2497624666666665, + "diff": -0.020943134666665975, + "se": 0.018324351447542377, + "t_stat": -1.142912737000295, + "p_tukey": 0.48959573746688456 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs744373_c" + }, + { + "name": "y", + "value": "rightmtgmiddletemporalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 835.0, + "df_explained": 2.0, + "ss_residual": 2179.817887710152, + "ss_explained": 12.447324666611737, + "ms_residual": 2.610560344563056, + "ms_explained": 6.223662333305868, + "p_value": 0.09280523749776558, + "f_stat": 2.3840331238723222, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 13.578097347931884, + "meanB": 13.360407530120488, + "diff": 0.21768981781139551, + "se": 0.11922609571830836, + "t_stat": 1.8258571372304617, + "p_tukey": 0.16195533027747044 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 13.578097347931884, + "meanB": 13.269984736842108, + "diff": 0.3081126110897756, + "se": 0.18393289783673333, + "t_stat": 1.67513595834971, + "p_tukey": 0.21563366092470515 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 13.360407530120488, + "meanB": 13.269984736842108, + "diff": 0.0904227932783801, + "se": 0.187996585453856, + "t_stat": 0.48098104048051704, + "p_tukey": 0.8686423919727014 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs3851179_a" + }, + { + "name": "y", + "value": "leftfrpfrontalpole" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,adni,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 155.49843101513048, + "ss_explained": 1.3877373856555386, + "ms_residual": 0.1749138706581895, + "ms_explained": 0.6938686928277693, + "p_value": 0.01926780525861529, + "f_stat": 3.966916346981441, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 3.51656780051151, + "meanB": 3.5666824173028018, + "diff": -0.05011461679129159, + "se": 0.029873458371417652, + "t_stat": -1.6775632793570459, + "p_tukey": 0.21464231445870452 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 3.51656780051151, + "meanB": 3.444561296296297, + "diff": 0.07200650421521315, + "se": 0.04546342408243095, + "t_stat": 1.583833722789032, + "p_tukey": 0.2534633923156808 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 3.5666824173028018, + "meanB": 3.444561296296297, + "diff": 0.12212112100650474, + "se": 0.04543837957827388, + "t_stat": 2.6876205124378227, + "p_tukey": 0.020034436534755784 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "adnicategory" + }, + { + "name": "y", + "value": "leftlorglateralorbitalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1063.0, + "df_explained": 2.0, + "ss_residual": 69.17402212805199, + "ss_explained": 1.0159740093675078, + "ms_residual": 0.06507433878462088, + "ms_explained": 0.5079870046837539, + "p_value": 0.0004309640114203372, + "f_stat": 7.80625687746223, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "CN", + "meanA": 2.2718554054054048, + "meanB": 2.341146380597016, + "diff": -0.06929097519161109, + "se": 0.023150428703169138, + "t_stat": -2.9930752505730327, + "p_tukey": 0.007950894589761393 + }, + { + "groupA": "AD", + "groupB": "MCI", + "meanA": 2.2718554054054048, + "meanB": 2.350193784722224, + "diff": -0.0783383793168193, + "se": 0.020152020613208147, + "t_stat": -3.8873709401366106, + "p_tukey": 0.001 + }, + { + "groupA": "CN", + "groupB": "MCI", + "meanA": 2.341146380597016, + "meanB": 2.350193784722224, + "diff": -0.009047404125208214, + "se": 0.01886242242050924, + "t_stat": -0.479652290862223, + "p_tukey": 0.8693442943891938 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs11136000_t" + }, + { + "name": "y", + "value": "rightpinsposteriorinsula" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 888.0, + "df_explained": 2.0, + "ss_residual": 63.79634187028566, + "ss_explained": 0.26225396176642474, + "ms_residual": 0.07184272733140278, + "ms_explained": 0.13112698088321237, + "p_value": 0.16179039114750524, + "f_stat": 1.8251949188724101, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 2.34353280254777, + "meanB": 2.3065237028824854, + "diff": 0.03700909966528476, + "se": 0.019700127096441172, + "t_stat": 1.8786223806632423, + "p_tukey": 0.14556280758701845 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 2.34353280254777, + "meanB": 2.3127440476190477, + "diff": 0.03078875492872246, + "se": 0.028266214290775655, + "t_stat": 1.0892422526765464, + "p_tukey": 0.5206321778965668 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 2.3065237028824854, + "meanB": 2.3127440476190477, + "diff": -0.0062203447365623, + "se": 0.02700882982611415, + "t_stat": -0.23030782068714456, + "p_tukey": 0.9 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "alzheimerbroadcategory" + }, + { + "name": "y", + "value": "rightamygdala" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 365.0, + "df_explained": 2.0, + "ss_residual": 3.7579271611539156, + "ss_explained": 1.143732546863201, + "ms_residual": 0.010295690852476481, + "ms_explained": 0.5718662734316005, + "p_value": 8.718646796053873e-22, + "f_stat": 55.54423511988476, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "CN", + "meanA": 0.7321317021276594, + "meanB": 0.8573076821192049, + "diff": -0.1251759799915455, + "se": 0.011882864435358178, + "t_stat": -10.534158718421194, + "p_tukey": 0.001 + }, + { + "groupA": "AD", + "groupB": "Other", + "meanA": 0.7321317021276594, + "meanB": 0.7923373684210525, + "diff": -0.06020566629339308, + "se": 0.014439137804594519, + "t_stat": -4.169616434731698, + "p_tukey": 0.001 + }, + { + "groupA": "CN", + "groupB": "Other", + "meanA": 0.8573076821192049, + "meanB": 0.7923373684210525, + "diff": 0.06497031369815243, + "se": 0.014270704243622213, + "t_stat": 4.552705499953768, + "p_tukey": 0.001 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "agegroup" + }, + { + "name": "y", + "value": "rightmfcmedialfrontalcortex" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 2212.0, + "df_explained": 4.0, + "ss_residual": 133.2506621105771, + "ss_explained": 3.9037089498830966, + "ms_residual": 0.060239901496644266, + "ms_explained": 0.9759272374707741, + "p_value": 4.3858914227620226e-13, + "f_stat": 16.20067784349115, + "tukey_test": [ + { + "groupA": "+80y", + "groupB": "-50y", + "meanA": 1.6960306851211082, + "meanB": 1.7702004294478524, + "diff": -0.07416974432674417, + "se": 0.024041890806766976, + "t_stat": -3.0850212623820714, + "p_tukey": 0.017577921767874316 + }, + { + "groupA": "+80y", + "groupB": "50-59y", + "meanA": 1.6960306851211082, + "meanB": 1.8614842168674697, + "diff": -0.16545353174636146, + "se": 0.021221919823142775, + "t_stat": -7.7963508073351715, + "p_tukey": 0.001 + }, + { + "groupA": "+80y", + "groupB": "60-69y", + "meanA": 1.6960306851211082, + "meanB": 1.791460949464013, + "diff": -0.0954302643429048, + "se": 0.01734051796891038, + "t_stat": -5.503311061065227, + "p_tukey": 0.001 + }, + { + "groupA": "+80y", + "groupB": "70-79y", + "meanA": 1.6960306851211082, + "meanB": 1.7667031170336032, + "diff": -0.07067243191249495, + "se": 0.016680691362708753, + "t_stat": -4.236780741024307, + "p_tukey": 0.001 + }, + { + "groupA": "-50y", + "groupB": "50-59y", + "meanA": 1.7702004294478524, + "meanB": 1.8614842168674697, + "diff": -0.09128378741961729, + "se": 0.02472847069836185, + "t_stat": -3.6914449151788604, + "p_tukey": 0.002127120721904574 + }, + { + "groupA": "-50y", + "groupB": "60-69y", + "meanA": 1.7702004294478524, + "meanB": 1.791460949464013, + "diff": -0.021260520016160633, + "se": 0.02149001967262813, + "t_stat": -0.9893206400010973, + "p_tukey": 0.8438497307288506 + }, + { + "groupA": "-50y", + "groupB": "70-79y", + "meanA": 1.7702004294478524, + "meanB": 1.7667031170336032, + "diff": 0.0034973124142492207, + "se": 0.020961222446169176, + "t_stat": 0.16684677733995334, + "p_tukey": 0.9 + }, + { + "groupA": "50-59y", + "groupB": "60-69y", + "meanA": 1.8614842168674697, + "meanB": 1.791460949464013, + "diff": 0.07002326740345666, + "se": 0.01828054465669275, + "t_stat": 3.8304803668866727, + "p_tukey": 0.0012389620551023217 + }, + { + "groupA": "50-59y", + "groupB": "70-79y", + "meanA": 1.8614842168674697, + "meanB": 1.7667031170336032, + "diff": 0.09478109983386651, + "se": 0.017655883264599947, + "t_stat": 5.368244590963209, + "p_tukey": 0.001 + }, + { + "groupA": "60-69y", + "groupB": "70-79y", + "meanA": 1.791460949464013, + "meanB": 1.7667031170336032, + "diff": 0.024757832430409854, + "se": 0.012730039132044487, + "t_stat": 1.9448355322088993, + "p_tukey": 0.29401687922474373 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "apoe4" + }, + { + "name": "y", + "value": "rightsmcsupplementarymotorcortex" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1058.0, + "df_explained": 2.0, + "ss_residual": 414.20642406874805, + "ss_explained": 0.9191185873578713, + "ms_residual": 0.3914994556415388, + "ms_explained": 0.45955929367893567, + "p_value": 0.3095785049043742, + "f_stat": 1.1738440170392297, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 5.258685645756456, + "meanB": 5.249790303797466, + "diff": 0.008895341958989889, + "se": 0.041393980012550985, + "t_stat": 0.21489458023347235, + "p_tukey": 0.9 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 5.258685645756456, + "meanB": 5.3456444354838695, + "diff": -0.08695878972741333, + "se": 0.062286253647368825, + "t_stat": -1.3961152683821232, + "p_tukey": 0.3440540592765279 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 5.249790303797466, + "meanB": 5.3456444354838695, + "diff": -0.09585413168640322, + "se": 0.06440800832902832, + "t_stat": -1.4882331277305203, + "p_tukey": 0.29748726710591133 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs610932_a" + }, + { + "name": "y", + "value": "leftmfgmiddlefrontalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 886.0, + "df_explained": 2.0, + "ss_residual": 3585.3631206887026, + "ss_explained": 1.8089939182038055, + "ms_residual": 4.046685237797632, + "ms_explained": 0.9044969591019028, + "p_value": 0.7997475588267433, + "f_stat": 0.22351552096356428, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 18.313838596491248, + "meanB": 18.219346840958615, + "diff": 0.09449175553263345, + "se": 0.1517076243266629, + "t_stat": 0.6228543618161868, + "p_tukey": 0.7874589334944082 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 18.313838596491248, + "meanB": 18.29993103448277, + "diff": 0.01390756200847676, + "se": 0.20520007218565142, + "t_stat": 0.06777561947392552, + "p_tukey": 0.9 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 18.219346840958615, + "meanB": 18.29993103448277, + "diff": -0.08058419352415669, + "se": 0.19163632683131063, + "t_stat": -0.42050583444490436, + "p_tukey": 0.9 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "apoe4" + }, + { + "name": "y", + "value": "rightputamen" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1058.0, + "df_explained": 2.0, + "ss_residual": 189.76868253683003, + "ss_explained": 0.2619479884940274, + "ms_residual": 0.17936548443934786, + "ms_explained": 0.1309739942470137, + "p_value": 0.48205173516995437, + "f_stat": 0.7302073453898112, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 3.7021967343173414, + "meanB": 3.671703316455694, + "diff": 0.030493417861647387, + "se": 0.0280182523034636, + "t_stat": 1.0883411831464525, + "p_tukey": 0.5211135126486959 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 3.7021967343173414, + "meanB": 3.668082903225806, + "diff": 0.03411383109153521, + "se": 0.04215955965578487, + "t_stat": 0.8091600427058616, + "p_tukey": 0.6808335329670228 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 3.671703316455694, + "meanB": 3.668082903225806, + "diff": 0.00362041322988782, + "se": 0.043595707085405444, + "t_stat": 0.08304517742527515, + "p_tukey": 0.9 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs1476679_c" + }, + { + "name": "y", + "value": "leftmfgmiddlefrontalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,adni,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 532.0, + "df_explained": 2.0, + "ss_residual": 2090.997912228101, + "ss_explained": 0.22475756322587237, + "ms_residual": 3.9304472034362803, + "ms_explained": 0.11237878161293618, + "p_value": 0.9718145161272724, + "f_stat": 0.028591856294287975, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 18.434181205673745, + "meanB": 18.473834951456322, + "diff": -0.03965374578257652, + "se": 0.1817074484822628, + "t_stat": -0.2182285102442968, + "p_tukey": 0.9 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 18.434181205673745, + "meanB": 18.480525531914893, + "diff": -0.04634432624114737, + "se": 0.31235283300120603, + "t_stat": -0.1483717173167705, + "p_tukey": 0.9 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 18.473834951456322, + "meanB": 18.480525531914893, + "diff": -0.0066905804585708495, + "se": 0.32047835658076057, + "t_stat": -0.020876855866192708, + "p_tukey": 0.9 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "ppmicategory" + }, + { + "name": "y", + "value": "rightputamen" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 710.0, + "df_explained": 3.0, + "ss_residual": 139.5298697331585, + "ss_explained": 1.696117491491643, + "ms_residual": 0.19652094328613873, + "ms_explained": 0.565372497163881, + "p_value": 0.03535354285003904, + "f_stat": 2.8769071006375464, + "tukey_test": [ + { + "groupA": "GENPD", + "groupB": "HC", + "meanA": 4.112617283950618, + "meanB": 4.185518032786887, + "diff": -0.07290074883626918, + "se": 0.059161384542654905, + "t_stat": -1.2322353406673283, + "p_tukey": 0.5937764338781897 + }, + { + "groupA": "GENPD", + "groupB": "PD", + "meanA": 4.112617283950618, + "meanB": 4.2410780246913555, + "diff": -0.12846074074073766, + "se": 0.0539575887342723, + "t_stat": -2.380772450254863, + "p_tukey": 0.08184795402018363 + }, + { + "groupA": "GENPD", + "groupB": "PRODROMA", + "meanA": 4.112617283950618, + "meanB": 4.109671111111112, + "diff": 0.002946172839505934, + "se": 0.08242157823069977, + "t_stat": 0.03574516410325865, + "p_tukey": 0.9 + }, + { + "groupA": "HC", + "groupB": "PD", + "meanA": 4.185518032786887, + "meanB": 4.2410780246913555, + "diff": -0.055559991904468475, + "se": 0.03948571682257574, + "t_stat": -1.4070908767876884, + "p_tukey": 0.4954116136288891 + }, + { + "groupA": "HC", + "groupB": "PRODROMA", + "meanA": 4.185518032786887, + "meanB": 4.109671111111112, + "diff": 0.07584692167577511, + "se": 0.07376324971837353, + "t_stat": 1.0282481041081704, + "p_tukey": 0.7081918113324484 + }, + { + "groupA": "PD", + "groupB": "PRODROMA", + "meanA": 4.2410780246913555, + "meanB": 4.109671111111112, + "diff": 0.1314069135802436, + "se": 0.06965894752311519, + "t_stat": 1.8864326587282751, + "p_tukey": 0.23488586422494562 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "agegroup" + }, + { + "name": "y", + "value": "pib" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,ppmi,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 9.0, + "df_explained": 3.0, + "ss_residual": 1.1017140625, + "ss_explained": 0.6653859374999989, + "ms_residual": 0.1224126736111111, + "ms_explained": 0.22179531249999962, + "p_value": 0.21507872926736787, + "f_stat": 1.8118656014704329, + "tukey_test": [ + { + "groupA": "+80y", + "groupB": "50-59y", + "meanA": 1.245, + "meanB": 2.22, + "diff": -0.9750000000000001, + "se": 0.42850788839491233, + "t_stat": -2.275337342451538, + "p_tukey": 0.17516855688863342 + }, + { + "groupA": "+80y", + "groupB": "60-69y", + "meanA": 1.245, + "meanB": 1.699375, + "diff": -0.454375, + "se": 0.3030008336759708, + "t_stat": -1.4995833327835288, + "p_tukey": 0.47685541934561315 + }, + { + "groupA": "+80y", + "groupB": "70-79y", + "meanA": 1.245, + "meanB": 1.6137500000000002, + "diff": -0.36875000000000013, + "se": 0.2856719255966082, + "t_stat": -1.2908163769677, + "p_tukey": 0.5850821685690389 + }, + { + "groupA": "50-59y", + "groupB": "60-69y", + "meanA": 2.22, + "meanB": 1.699375, + "diff": 0.5206250000000001, + "se": 0.39117239423799943, + "t_stat": 1.3309349219649644, + "p_tukey": 0.5642133559558826 + }, + { + "groupA": "50-59y", + "groupB": "70-79y", + "meanA": 2.22, + "meanB": 1.6137500000000002, + "diff": 0.60625, + "se": 0.37790843584078615, + "t_stat": 1.6042245753291804, + "p_tukey": 0.4241408090831983 + }, + { + "groupA": "60-69y", + "groupB": "70-79y", + "meanA": 1.699375, + "meanB": 1.6137500000000002, + "diff": 0.08562499999999984, + "se": 0.22584348711285937, + "t_stat": 0.3791342451120186, + "p_tukey": 0.9 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs610932_a" + }, + { + "name": "y", + "value": "rightporgposteriororbitalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 886.0, + "df_explained": 2.0, + "ss_residual": 48.027395247451516, + "ss_explained": 0.04184313040179249, + "ms_residual": 0.05420699237861345, + "ms_explained": 0.020921565200896244, + "p_value": 0.6799140311952919, + "f_stat": 0.3859569454576589, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 2.279908070175438, + "meanB": 2.2721690849673184, + "diff": 0.007738985208119775, + "se": 0.017558416667503112, + "t_stat": 0.4407564391863982, + "p_tukey": 0.8916412360373324 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 2.279908070175438, + "meanB": 2.2590701379310345, + "diff": 0.020837932244403667, + "se": 0.023749553680171575, + "t_stat": 0.8774031093393215, + "p_tukey": 0.6418277458232513 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 2.2721690849673184, + "meanB": 2.2590701379310345, + "diff": 0.013098947036283892, + "se": 0.022179705799681308, + "t_stat": 0.5905825421936889, + "p_tukey": 0.8059230335917602 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs3851179_a" + }, + { + "name": "y", + "value": "rightaccumbensarea" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 2.0171938783454593, + "ss_explained": 0.005763607661018987, + "ms_residual": 0.0022690594807035537, + "ms_explained": 0.0028818038305094937, + "p_value": 0.28132847088712143, + "f_stat": 1.2700433175140697, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 0.3725266598465468, + "meanB": 0.37703962340966907, + "diff": -0.004512963563122252, + "se": 0.003402485771364075, + "t_stat": -1.3263725012766123, + "p_tukey": 0.3821909798495686 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 0.3725266598465468, + "meanB": 0.3704686203703705, + "diff": 0.002058039476176332, + "se": 0.005178130085734072, + "t_stat": 0.3974483920066632, + "p_tukey": 0.9 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 0.37703962340966907, + "meanB": 0.3704686203703705, + "diff": 0.006571003039298584, + "se": 0.00517527760149921, + "t_stat": 1.2696909316313876, + "p_tukey": 0.41446977658637374 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "edsdcategory" + }, + { + "name": "y", + "value": "rightptplanumtemporale" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,adni,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 365.0, + "df_explained": 2.0, + "ss_residual": 21.212179236998047, + "ss_explained": 1.1256271490888985, + "ms_residual": 0.05811555955341931, + "ms_explained": 0.5628135745444492, + "p_value": 7.978472534858899e-05, + "f_stat": 9.684387087886781, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "CN", + "meanA": 1.7660042553191488, + "meanB": 1.879070860927152, + "diff": -0.11306660560800319, + "se": 0.02823186753213468, + "t_stat": -4.004928312989075, + "p_tukey": 0.001 + }, + { + "groupA": "AD", + "groupB": "MCI", + "meanA": 1.7660042553191488, + "meanB": 1.881117105263158, + "diff": -0.11511284994400905, + "se": 0.034305181885655595, + "t_stat": -3.355552823701613, + "p_tukey": 0.0025177531630641292 + }, + { + "groupA": "CN", + "groupB": "MCI", + "meanA": 1.879070860927152, + "meanB": 1.881117105263158, + "diff": -0.0020462443360058646, + "se": 0.03390500951920273, + "t_stat": -0.060352271390660905, + "p_tukey": 0.9 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "parkinsonbroadcategory" + }, + { + "name": "y", + "value": "leftpinsposteriorinsula" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 711.0, + "df_explained": 2.0, + "ss_residual": 43.48042263178986, + "ss_explained": 1.2493876836721471, + "ms_residual": 0.06115389962277054, + "ms_explained": 0.6246938418360736, + "p_value": 4.228408767904569e-05, + "f_stat": 10.215110494825582, + "tukey_test": [ + { + "groupA": "CN", + "groupB": "Other", + "meanA": 2.310218032786887, + "meanB": 2.310941269841269, + "diff": -0.0007232370543821531, + "se": 0.028627307323306813, + "t_stat": -0.02526388689701642, + "p_tukey": 0.9 + }, + { + "groupA": "CN", + "groupB": "PD", + "meanA": 2.310218032786887, + "meanB": 2.394940493827161, + "diff": -0.08472246104027414, + "se": 0.02202661113057826, + "t_stat": -3.8463684012952353, + "p_tukey": 0.001 + }, + { + "groupA": "Other", + "groupB": "PD", + "meanA": 2.310941269841269, + "meanB": 2.394940493827161, + "diff": -0.08399922398589199, + "se": 0.02522589329882521, + "t_stat": -3.329881046860842, + "p_tukey": 0.002628046703027076 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs3865444_t" + }, + { + "name": "y", + "value": "rightsmgsupramarginalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,adni,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 642.5409745563304, + "ss_explained": 4.771273778371824, + "ms_residual": 0.7227682503445786, + "ms_explained": 2.385636889185912, + "p_value": 0.03730978910495671, + "f_stat": 3.30069408561951, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 7.879509821428566, + "meanB": 7.77043857526881, + "diff": 0.10907124615975583, + "se": 0.05963428033881508, + "t_stat": 1.8290024720691223, + "p_tukey": 0.1609059327361787 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 7.879509821428566, + "meanB": 8.018593333333332, + "diff": -0.13908351190476598, + "se": 0.10794336435755918, + "t_stat": -1.2884859827423574, + "p_tukey": 0.4036521558961196 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 7.77043857526881, + "meanB": 8.018593333333332, + "diff": -0.2481547580645218, + "se": 0.1094594597355778, + "t_stat": -2.2670928457347723, + "p_tukey": 0.0610781723986179 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "adnicategory" + }, + { + "name": "y", + "value": "righttrifgtriangularpartoftheinferiorfrontalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1063.0, + "df_explained": 2.0, + "ss_residual": 169.3266614990567, + "ss_explained": 2.9361404779971156, + "ms_residual": 0.15929130903015684, + "ms_explained": 1.4680702389985578, + "p_value": 0.0001075809262866779, + "f_stat": 9.216260748542311, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "CN", + "meanA": 3.384357747747748, + "meanB": 3.4789699626865676, + "diff": -0.09461221493881977, + "se": 0.03622014598448542, + "t_stat": -2.612143390569052, + "p_tukey": 0.024733549713938707 + }, + { + "groupA": "AD", + "groupB": "MCI", + "meanA": 3.384357747747748, + "meanB": 3.5196780381944444, + "diff": -0.13532029044669658, + "se": 0.03152896811767631, + "t_stat": -4.291935274939461, + "p_tukey": 0.001 + }, + { + "groupA": "CN", + "groupB": "MCI", + "meanA": 3.4789699626865676, + "meanB": 3.5196780381944444, + "diff": -0.040708075507876806, + "se": 0.029511319312991814, + "t_stat": -1.3794054774757503, + "p_tukey": 0.3529886613891525 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs610932_a" + }, + { + "name": "y", + "value": "leftmogmiddleoccipitalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 886.0, + "df_explained": 2.0, + "ss_residual": 489.5501199010008, + "ss_explained": 0.47141876975502006, + "ms_residual": 0.5525396387144479, + "ms_explained": 0.23570938487751003, + "p_value": 0.652863340771847, + "f_stat": 0.4265927154582379, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 6.020418736842107, + "meanB": 6.01177459694989, + "diff": 0.008644139892216351, + "se": 0.0560582343371192, + "t_stat": 0.1541992892646745, + "p_tukey": 0.9 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 6.020418736842107, + "meanB": 5.953632413793104, + "diff": 0.06678632304900312, + "se": 0.07582449322262125, + "t_stat": 0.8808014430497808, + "p_tukey": 0.6398818015314789 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 6.01177459694989, + "meanB": 5.953632413793104, + "diff": 0.058142183156786764, + "se": 0.07081248661492821, + "t_stat": 0.8210724680939198, + "p_tukey": 0.6740553271585464 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "dataset" + }, + { + "name": "y", + "value": "rightangangulargyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1149.0, + "df_explained": 1.0, + "ss_residual": 1670.1990163487844, + "ss_explained": 188.14032151910305, + "ms_residual": 1.4536109802861483, + "ms_explained": 188.14032151910305, + "p_value": 1.7129312813484601e-28, + "f_stat": 129.42962324216, + "tukey_test": [ + { + "groupA": "edsd", + "groupB": "ppmi", + "meanA": 9.72729221967964, + "meanB": 10.560376190476191, + "diff": -0.8330839707965509, + "se": 0.07322711680979481, + "t_stat": -11.376714079300173, + "p_tukey": 0.001 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "apoe4" + }, + { + "name": "y", + "value": "leftmfcmedialfrontalcortex" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1058.0, + "df_explained": 2.0, + "ss_residual": 56.433565987083696, + "ss_explained": 0.05932065605193289, + "ms_residual": 0.05333985443013582, + "ms_explained": 0.029660328025966444, + "p_value": 0.573629759224322, + "f_stat": 0.5560631603300557, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 1.7777124372693718, + "meanB": 1.7619604556962012, + "diff": 0.015751981573170593, + "se": 0.015279090997372805, + "t_stat": 1.0309501773292076, + "p_tukey": 0.5539456906017959 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 1.7777124372693718, + "meanB": 1.7661733064516139, + "diff": 0.011539130817757925, + "se": 0.022990718386466727, + "t_stat": 0.5019038824184949, + "p_tukey": 0.8566153746234767 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 1.7619604556962012, + "meanB": 1.7661733064516139, + "diff": -0.004212850755412667, + "se": 0.023773887408758073, + "t_stat": -0.1772049594994167, + "p_tukey": 0.9 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs3865444_t" + }, + { + "name": "y", + "value": "rightmfgmiddlefrontalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 3523.586615285245, + "ss_explained": 8.516363014666586, + "ms_residual": 3.963539499758431, + "ms_explained": 4.258181507333293, + "p_value": 0.34196671342167134, + "f_stat": 1.0743381029993064, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 17.929846875, + "meanB": 17.94061747311827, + "diff": -0.010770598118270414, + "se": 0.13964906894787657, + "t_stat": -0.07712617204981506, + "p_tukey": 0.9 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 17.929846875, + "meanB": 18.29294027777778, + "diff": -0.3630934027777819, + "se": 0.25277726579393256, + "t_stat": -1.4364163709001447, + "p_tukey": 0.32314575810100066 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 17.94061747311827, + "meanB": 18.29294027777778, + "diff": -0.3523228046595115, + "se": 0.2563275946781511, + "t_stat": -1.3745020511813935, + "p_tukey": 0.35570083098182514 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "neurodegenerativescategories" + }, + { + "name": "y", + "value": "rightstgsuperiortemporalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1013.0, + "df_explained": 1.0, + "ss_residual": 641.1043464792691, + "ss_explained": 53.45872748890283, + "ms_residual": 0.6328769461789429, + "ms_explained": 53.45872748890283, + "p_value": 2.1543314935555968e-19, + "f_stat": 84.46938668198484, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "MCI", + "meanA": 6.717761928374657, + "meanB": 7.196574141104288, + "diff": -0.4788122127296317, + "se": 0.05209733989851228, + "t_stat": -9.190722859600456, + "p_tukey": 0.001 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs190982_g" + }, + { + "name": "y", + "value": "leftpcuprecuneus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 533.0, + "df_explained": 2.0, + "ss_residual": 672.3172200241156, + "ss_explained": 1.844937156758212, + "ms_residual": 1.2613831520152263, + "ms_explained": 0.922468578379106, + "p_value": 0.4817579162307781, + "f_stat": 0.7313151257057307, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 10.78210311926606, + "meanB": 10.868417449392714, + "diff": -0.08631433012665468, + "se": 0.10436941902877832, + "t_stat": -0.827007862359134, + "p_tukey": 0.6708122006021553 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 10.78210311926606, + "meanB": 10.701672535211268, + "diff": 0.08043058405479186, + "se": 0.1534670034089239, + "t_stat": 0.5240904055478217, + "p_tukey": 0.8441193362135668 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 10.868417449392714, + "meanB": 10.701672535211268, + "diff": 0.16674491418144655, + "se": 0.15123747640107874, + "t_stat": 1.1025370043813902, + "p_tukey": 0.5131733682798588 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "gender" + }, + { + "name": "y", + "value": "rightcaudate" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,edsd,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 2215.0, + "df_explained": 1.0, + "ss_residual": 490.3447341676296, + "ss_explained": 30.989968109257106, + "ms_residual": 0.22137459781834293, + "ms_explained": 30.989968109257106, + "p_value": 2.3060136826808515e-31, + "f_stat": 139.9888172114809, + "tukey_test": [ + { + "groupA": "F", + "groupB": "M", + "meanA": 2.965897338129496, + "meanB": 3.2041439871382638, + "diff": -0.2382466490087678, + "se": 0.020136321139591993, + "t_stat": -11.831686997697297, + "p_tukey": 0.001 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "agegroup" + }, + { + "name": "y", + "value": "rightcocentraloperculum" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 432.0, + "df_explained": 4.0, + "ss_residual": 136.1069135380543, + "ss_explained": 5.274439794243713, + "ms_residual": 0.31506229985660716, + "ms_explained": 1.3186099485609282, + "p_value": 0.0024573153279870037, + "f_stat": 4.1852355840767395, + "tukey_test": [ + { + "groupA": "+80y", + "groupB": "-50y", + "meanA": 3.711329787234042, + "meanB": 3.652278148148149, + "diff": 0.05905163908589284, + "se": 0.10292284105755888, + "t_stat": 0.5737466871213613, + "p_tukey": 0.9 + }, + { + "groupA": "+80y", + "groupB": "50-59y", + "meanA": 3.711329787234042, + "meanB": 4.098445454545455, + "diff": -0.38711566731141334, + "se": 0.14499816143070307, + "t_stat": -2.669797075298931, + "p_tukey": 0.060274589474212714 + }, + { + "groupA": "+80y", + "groupB": "60-69y", + "meanA": 3.711329787234042, + "meanB": 3.8498068376068377, + "diff": -0.13847705037279567, + "se": 0.09693447971776112, + "t_stat": -1.4285634046418965, + "p_tukey": 0.5950525323213174 + }, + { + "groupA": "+80y", + "groupB": "70-79y", + "meanA": 3.711329787234042, + "meanB": 3.8811223529411745, + "diff": -0.16979256570713241, + "se": 0.09250276122718601, + "t_stat": -1.8355405120299413, + "p_tukey": 0.3549377083773039 + }, + { + "groupA": "-50y", + "groupB": "50-59y", + "meanA": 3.652278148148149, + "meanB": 4.098445454545455, + "diff": -0.4461673063973062, + "se": 0.13494692161278396, + "t_stat": -3.3062429365935184, + "p_tukey": 0.009023814593816182 + }, + { + "groupA": "-50y", + "groupB": "60-69y", + "meanA": 3.652278148148149, + "meanB": 3.8498068376068377, + "diff": -0.1975286894586885, + "se": 0.08113259636321069, + "t_stat": -2.4346403087410287, + "p_tukey": 0.10821022018016346 + }, + { + "groupA": "-50y", + "groupB": "70-79y", + "meanA": 3.652278148148149, + "meanB": 3.8811223529411745, + "diff": -0.22884420479302525, + "se": 0.07578235724188323, + "t_stat": -3.0197556940937718, + "p_tukey": 0.022389783740721536 + }, + { + "groupA": "50-59y", + "groupB": "60-69y", + "meanA": 4.098445454545455, + "meanB": 3.8498068376068377, + "diff": 0.24863861693861766, + "se": 0.13043716418092036, + "t_stat": 1.9061945918553416, + "p_tukey": 0.31552881539193134 + }, + { + "groupA": "50-59y", + "groupB": "70-79y", + "meanA": 4.098445454545455, + "meanB": 3.8811223529411745, + "diff": 0.21732310160428092, + "se": 0.12717830505263514, + "t_stat": 1.7088063999149672, + "p_tukey": 0.4312586641190792 + }, + { + "groupA": "60-69y", + "groupB": "70-79y", + "meanA": 3.8498068376068377, + "meanB": 3.8811223529411745, + "diff": -0.03131551533433674, + "se": 0.06742512748174768, + "t_stat": -0.46444873749499405, + "p_tukey": 0.9 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "gender" + }, + { + "name": "y", + "value": "leftacgganteriorcingulategyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 712.0, + "df_explained": 1.0, + "ss_residual": 213.91927543094758, + "ss_explained": 18.329505568828317, + "ms_residual": 0.30044842054908366, + "ms_explained": 18.329505568828317, + "p_value": 2.0401577919569557e-14, + "f_stat": 61.007162345304664, + "tukey_test": [ + { + "groupA": "F", + "groupB": "M", + "meanA": 4.473263671874998, + "meanB": 4.807360043668119, + "diff": -0.33409637179312135, + "se": 0.042774145941202595, + "t_stat": -7.810708184620933, + "p_tukey": 0.001 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs190982_g" + }, + { + "name": "y", + "value": "rightttgtransversetemporalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 533.0, + "df_explained": 2.0, + "ss_residual": 18.491593992776163, + "ss_explained": 0.04709899011259658, + "ms_residual": 0.03469342212528361, + "ms_explained": 0.02354949505629829, + "p_value": 0.5076690676812438, + "f_stat": 0.6787884738281862, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 1.50879495412844, + "meanB": 1.5228651619433216, + "diff": -0.014070207814881552, + "se": 0.017309053615872885, + "t_stat": -0.8128814045603729, + "p_tukey": 0.6788917807712898 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 1.50879495412844, + "meanB": 1.4964578873239438, + "diff": 0.0123370668044962, + "se": 0.025451598897373924, + "t_stat": 0.4847265923937348, + "p_tukey": 0.8666383801621251 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 1.5228651619433216, + "meanB": 1.4964578873239438, + "diff": 0.026407274619377752, + "se": 0.025081844970639998, + "t_stat": 1.0528441847196353, + "p_tukey": 0.5416050168721471 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "dataset" + }, + { + "name": "y", + "value": "rightententorhinalarea" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1149.0, + "df_explained": 1.0, + "ss_residual": 47.110801741795385, + "ss_explained": 8.204662516276187, + "ms_residual": 0.04100156809555734, + "ms_explained": 8.204662516276187, + "p_value": 5.3249193174432493e-42, + "f_stat": 200.10606660590597, + "tukey_test": [ + { + "groupA": "edsd", + "groupB": "ppmi", + "meanA": 1.5623321052631578, + "meanB": 1.7363036414565813, + "diff": -0.17397153619342354, + "se": 0.012298384610052502, + "t_stat": -14.145885147486931, + "p_tukey": 0.001 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs3764650_g" + }, + { + "name": "y", + "value": "rightorifgorbitalpartoftheinferiorfrontalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 29.637164030802932, + "ss_explained": 0.009226217782164658, + "ms_residual": 0.03333764232936213, + "ms_explained": 0.004613108891082329, + "p_value": 0.8707905124335477, + "f_stat": 0.13837537896371674, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 1.5281775174337529, + "meanB": 1.5203609150326796, + "diff": 0.007816602401073292, + "se": 0.01626004617233225, + "t_stat": 0.48072448984639765, + "p_tukey": 0.8687727631712583 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 1.5281775174337529, + "meanB": 1.5352249999999996, + "diff": -0.0070474825662467655, + "se": 0.03952016438495331, + "t_stat": -0.17832624625746712, + "p_tukey": 0.9 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 1.5203609150326796, + "meanB": 1.5352249999999996, + "diff": -0.014864084967320057, + "se": 0.041632204670511784, + "t_stat": -0.3570333371715078, + "p_tukey": 0.9 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs1476679_c" + }, + { + "name": "y", + "value": "leftsplsuperiorparietallobule" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 532.0, + "df_explained": 2.0, + "ss_residual": 720.4543117291412, + "ss_explained": 1.337482412242933, + "ms_residual": 1.3542374280622955, + "ms_explained": 0.6687412061214665, + "p_value": 0.6105738332729727, + "f_stat": 0.49381385587483856, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 10.659314574468095, + "meanB": 10.725492864077667, + "diff": -0.06617828960957262, + "se": 0.10665940408885384, + "t_stat": -0.6204637103957756, + "p_tukey": 0.7889821069207559 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 10.659314574468095, + "meanB": 10.549275957446808, + "diff": 0.11003861702128681, + "se": 0.18334618262292074, + "t_stat": 0.6001685742625901, + "p_tukey": 0.800592865307542 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 10.725492864077667, + "meanB": 10.549275957446808, + "diff": 0.17621690663085943, + "se": 0.18811573670638923, + "t_stat": 0.9367472903444464, + "p_tukey": 0.6080282405443889 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs3851179_a" + }, + { + "name": "y", + "value": "leftprgprecentralgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 1818.524669557273, + "ss_explained": 3.4198160915342832, + "ms_residual": 2.0455845551825345, + "ms_explained": 1.7099080457671416, + "p_value": 0.4338237544352249, + "f_stat": 0.8359019144112382, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 12.43031386189259, + "meanB": 12.514633816793884, + "diff": -0.08431995490129474, + "se": 0.10216028491008364, + "t_stat": -0.8253692222521593, + "p_tukey": 0.671596891684586 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 12.43031386189259, + "meanB": 12.326476851851844, + "diff": 0.10383701004074553, + "se": 0.15547434446668973, + "t_stat": 0.6678723129332282, + "p_tukey": 0.7617037292465794 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 12.514633816793884, + "meanB": 12.326476851851844, + "diff": 0.18815696494204026, + "se": 0.155388698082923, + "t_stat": 1.2108793449162598, + "p_tukey": 0.4489134614590852 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "agegroup" + }, + { + "name": "y", + "value": "leftmfgmiddlefrontalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1498.0, + "df_explained": 4.0, + "ss_residual": 7294.520974943634, + "ss_explained": 273.6759871925244, + "ms_residual": 4.869506658840877, + "ms_explained": 68.4189967981311, + "p_value": 2.935830287432532e-11, + "f_stat": 14.050498662716144, + "tukey_test": [ + { + "groupA": "+80y", + "groupB": "-50y", + "meanA": 17.59551074074074, + "meanB": 16.960304938271612, + "diff": 0.6352058024691267, + "se": 0.27955782229458037, + "t_stat": 2.272180392791109, + "p_tukey": 0.154540054679428 + }, + { + "groupA": "+80y", + "groupB": "50-59y", + "meanA": 17.59551074074074, + "meanB": 18.874507575757576, + "diff": -1.2789968350168373, + "se": 0.3030109136143289, + "t_stat": -4.220959633964667, + "p_tukey": 0.001 + }, + { + "groupA": "+80y", + "groupB": "60-69y", + "meanA": 17.59551074074074, + "meanB": 18.41614865591398, + "diff": -0.8206379151732399, + "se": 0.1764235854728884, + "t_stat": -4.651520447073955, + "p_tukey": 0.001 + }, + { + "groupA": "+80y", + "groupB": "70-79y", + "meanA": 17.59551074074074, + "meanB": 18.295110504201674, + "diff": -0.6995997634609346, + "se": 0.15765546988566861, + "t_stat": -4.43752293509564, + "p_tukey": 0.001 + }, + { + "groupA": "-50y", + "groupB": "50-59y", + "meanA": 16.960304938271612, + "meanB": 18.874507575757576, + "diff": -1.914202637485964, + "se": 0.3659204422826919, + "t_stat": -5.231198961022097, + "p_tukey": 0.001 + }, + { + "groupA": "-50y", + "groupB": "60-69y", + "meanA": 16.960304938271612, + "meanB": 18.41614865591398, + "diff": -1.4558437176423666, + "se": 0.27056873031095097, + "t_stat": -5.38067985893728, + "p_tukey": 0.001 + }, + { + "groupA": "-50y", + "groupB": "70-79y", + "meanA": 16.960304938271612, + "meanB": 18.295110504201674, + "diff": -1.3348055659300613, + "se": 0.2587226381587848, + "t_stat": -5.159214421394607, + "p_tukey": 0.001 + }, + { + "groupA": "50-59y", + "groupB": "60-69y", + "meanA": 18.874507575757576, + "meanB": 18.41614865591398, + "diff": 0.45835891984359733, + "se": 0.29473797784707395, + "t_stat": 1.555140342590729, + "p_tukey": 0.5229453504019976 + }, + { + "groupA": "50-59y", + "groupB": "70-79y", + "meanA": 18.874507575757576, + "meanB": 18.295110504201674, + "diff": 0.5793970715559027, + "se": 0.28390216846502536, + "t_stat": 2.040833554349128, + "p_tukey": 0.24692516522408592 + }, + { + "groupA": "60-69y", + "groupB": "70-79y", + "meanA": 18.41614865591398, + "meanB": 18.295110504201674, + "diff": 0.12103815171230536, + "se": 0.14110318565100827, + "t_stat": 0.8577988594224235, + "p_tukey": 0.9 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "edsdcategory" + }, + { + "name": "y", + "value": "rightorifgorbitalpartoftheinferiorfrontalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,ppmi,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 365.0, + "df_explained": 2.0, + "ss_residual": 12.174193462643624, + "ss_explained": 0.6119409595302846, + "ms_residual": 0.03335395469217431, + "ms_explained": 0.3059704797651423, + "p_value": 0.00012969411704519937, + "f_stat": 9.173439329427728, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "CN", + "meanA": 1.457164539007092, + "meanB": 1.5451211920529793, + "diff": -0.08795665304588729, + "se": 0.021387846720134822, + "t_stat": -4.112459482098479, + "p_tukey": 0.001 + }, + { + "groupA": "AD", + "groupB": "MCI", + "meanA": 1.457164539007092, + "meanB": 1.530817105263158, + "diff": -0.07365256625606609, + "se": 0.025988857132515367, + "t_stat": -2.8340055847980077, + "p_tukey": 0.013429031416622328 + }, + { + "groupA": "CN", + "groupB": "MCI", + "meanA": 1.5451211920529793, + "meanB": 1.530817105263158, + "diff": 0.014304086789821202, + "se": 0.025685695280909717, + "t_stat": 0.556889219208809, + "p_tukey": 0.8255173214703928 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs3851179_a" + }, + { + "name": "y", + "value": "leftporgposteriororbitalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,edsd,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 54.41922229760719, + "ss_explained": 0.2851771326406408, + "ms_residual": 0.061213973338140826, + "ms_explained": 0.1425885663203204, + "p_value": 0.09795328415466364, + "f_stat": 2.329346692342175, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 2.4227461636828647, + "meanB": 2.4588294147582728, + "diff": -0.03608325107540811, + "se": 0.017672527351106807, + "t_stat": -2.0417708434412614, + "p_tukey": 0.10299669110852283 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 2.4227461636828647, + "meanB": 2.4230601851851854, + "diff": -0.0003140215023207382, + "se": 0.026895232402702255, + "t_stat": -0.01167573113401271, + "p_tukey": 0.9 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 2.4588294147582728, + "meanB": 2.4230601851851854, + "diff": 0.03576922957308737, + "se": 0.02688041658595925, + "t_stat": 1.3306798820882528, + "p_tukey": 0.37978122788539936 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "ppmicategory" + }, + { + "name": "y", + "value": "leftioginferioroccipitalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,edsd,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 710.0, + "df_explained": 3.0, + "ss_residual": 456.71257323032984, + "ss_explained": 6.555099990917139, + "ms_residual": 0.6432571453948308, + "ms_explained": 2.185033330305713, + "p_value": 0.0175316845678849, + "f_stat": 3.396827141289726, + "tukey_test": [ + { + "groupA": "GENPD", + "groupB": "HC", + "meanA": 6.4476407407407414, + "meanB": 6.548237704918027, + "diff": -0.10059696417728592, + "se": 0.1070350998865331, + "t_stat": -0.9398502386967248, + "p_tukey": 0.7577754016851003 + }, + { + "groupA": "GENPD", + "groupB": "PD", + "meanA": 6.4476407407407414, + "meanB": 6.707214320987652, + "diff": -0.2595735802469106, + "se": 0.0976203641016096, + "t_stat": -2.659010572596611, + "p_tukey": 0.03995195934016549 + }, + { + "groupA": "GENPD", + "groupB": "PRODROMA", + "meanA": 6.4476407407407414, + "meanB": 6.558926666666666, + "diff": -0.1112859259259249, + "se": 0.1491175692882586, + "t_stat": -0.7462965394158116, + "p_tukey": 0.8663429794749147 + }, + { + "groupA": "HC", + "groupB": "PD", + "meanA": 6.548237704918027, + "meanB": 6.707214320987652, + "diff": -0.15897661606962465, + "se": 0.07143777443458212, + "t_stat": -2.225385901617144, + "p_tukey": 0.1175220281763989 + }, + { + "groupA": "HC", + "groupB": "PRODROMA", + "meanA": 6.548237704918027, + "meanB": 6.558926666666666, + "diff": -0.010688961748638981, + "se": 0.1334528740764844, + "t_stat": -0.08009540313468957, + "p_tukey": 0.9 + }, + { + "groupA": "PD", + "groupB": "PRODROMA", + "meanA": 6.707214320987652, + "meanB": 6.558926666666666, + "diff": 0.14828765432098567, + "se": 0.1260273481387461, + "t_stat": 1.1766307592042065, + "p_tukey": 0.6249630013222718 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "neurodegenerativescategories" + }, + { + "name": "y", + "value": "_3rdventricle" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 619.0, + "df_explained": 2.0, + "ss_residual": 156.58740725687295, + "ss_explained": 28.887399936692418, + "ms_residual": 0.25296834774939086, + "ms_explained": 14.443699968346209, + "p_value": 1.7500665819414835e-23, + "f_stat": 57.09686645324974, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "MCI", + "meanA": 1.8962894326241129, + "meanB": 1.6444811842105265, + "diff": 0.2518082484135864, + "se": 0.07157256910665728, + "t_stat": 3.5182228548809293, + "p_tukey": 0.0013547189187185227 + }, + { + "groupA": "AD", + "groupB": "PD", + "meanA": 1.8962894326241129, + "meanB": 1.3811469876543203, + "diff": 0.5151424449697926, + "se": 0.04918043270862267, + "t_stat": 10.474540718700794, + "p_tukey": 0.001 + }, + { + "groupA": "MCI", + "groupB": "PD", + "meanA": 1.6444811842105265, + "meanB": 1.3811469876543203, + "diff": 0.26333419655620616, + "se": 0.06287403355725509, + "t_stat": 4.1882822153664705, + "p_tukey": 0.001 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs3764650_g" + }, + { + "name": "y", + "value": "rightioginferioroccipitalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 557.2761859753915, + "ss_explained": 2.4074281370358555, + "ms_residual": 0.6268573520533088, + "ms_explained": 1.2037140685179277, + "p_value": 0.14717981208055692, + "f_stat": 1.920236022717274, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 6.444648633193866, + "meanB": 6.362989084967321, + "diff": 0.0816595482265452, + "se": 0.07050804856540827, + "t_stat": 1.1581592440583859, + "p_tukey": 0.48030881816325754 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 6.444648633193866, + "meanB": 6.700491363636363, + "diff": -0.25584273044249706, + "se": 0.17137034177114688, + "t_stat": -1.492923033229152, + "p_tukey": 0.2953028905781473 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 6.362989084967321, + "meanB": 6.700491363636363, + "diff": -0.33750227866904225, + "se": 0.18052873144900938, + "t_stat": -1.8695211336172841, + "p_tukey": 0.14829643492746591 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "adnicategory" + }, + { + "name": "y", + "value": "fdg" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,ppmi,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 738.0, + "df_explained": 2.0, + "ss_residual": 323.69847603662066, + "ss_explained": 119.40717777904058, + "ms_residual": 0.4386158211878329, + "ms_explained": 59.70358888952029, + "p_value": 4.794143832538489e-51, + "f_stat": 136.11818362555786, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "CN", + "meanA": 5.349276510067114, + "meanB": 6.522110411764703, + "diff": -1.1728339016975893, + "se": 0.07432242871000169, + "t_stat": -15.780349512983005, + "p_tukey": 0.001 + }, + { + "groupA": "AD", + "groupB": "MCI", + "meanA": 5.349276510067114, + "meanB": 6.211054336492892, + "diff": -0.8617778264257785, + "se": 0.06311183983532667, + "t_stat": -13.654772680916217, + "p_tukey": 0.001 + }, + { + "groupA": "CN", + "groupB": "MCI", + "meanA": 6.522110411764703, + "meanB": 6.211054336492892, + "diff": 0.3110560752718108, + "se": 0.06016200669582261, + "t_stat": 5.170307513918234, + "p_tukey": 0.001 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs10498633_t" + }, + { + "name": "y", + "value": "leftpcggposteriorcingulategyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 205.54323901431295, + "ss_explained": 0.31741197389016573, + "ms_residual": 0.23120724298572884, + "ms_explained": 0.15870598694508287, + "p_value": 0.5036399688803118, + "f_stat": 0.6864230760922958, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 4.3257038103756695, + "meanB": 4.352339653979239, + "diff": -0.026635843603569853, + "se": 0.03483724624441458, + "t_stat": -0.7645794795804318, + "p_tukey": 0.7063750475887947 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 4.3257038103756695, + "meanB": 4.400789090909091, + "diff": -0.0750852805334219, + "se": 0.07528823767041633, + "t_stat": -0.9973042650050742, + "p_tukey": 0.5732304432310297 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 4.352339653979239, + "meanB": 4.400789090909091, + "diff": -0.04844943692985204, + "se": 0.07781217854611082, + "t_stat": -0.6226459384007778, + "p_tukey": 0.7875790274949619 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "edsdcategory" + }, + { + "name": "y", + "value": "leftangangulargyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,ppmi,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 365.0, + "df_explained": 2.0, + "ss_residual": 365.0148917625786, + "ss_explained": 31.6848247545679, + "ms_residual": 1.0000407993495304, + "ms_explained": 15.84241237728395, + "p_value": 2.5257272769588954e-07, + "f_stat": 15.84176604353396, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "CN", + "meanA": 8.453389361702126, + "meanB": 9.08145033112583, + "diff": -0.6280609694237036, + "se": 0.11711223116868832, + "t_stat": -5.36289816320761, + "p_tukey": 0.001 + }, + { + "groupA": "AD", + "groupB": "MCI", + "meanA": 8.453389361702126, + "meanB": 8.99833421052632, + "diff": -0.5449448488241941, + "se": 0.1423057254963331, + "t_stat": -3.829395106370728, + "p_tukey": 0.001 + }, + { + "groupA": "CN", + "groupB": "MCI", + "meanA": 9.08145033112583, + "meanB": 8.99833421052632, + "diff": 0.08311612059950946, + "se": 0.14064571917071525, + "t_stat": 0.5909608987005386, + "p_tukey": 0.8060230544269805 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "edsdcategory" + }, + { + "name": "y", + "value": "leftscasubcallosalarea" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 365.0, + "df_explained": 2.0, + "ss_residual": 7.70291143620262, + "ss_explained": 0.2003501666658586, + "ms_residual": 0.02110386694850033, + "ms_explained": 0.1001750833329293, + "p_value": 0.009222737141930138, + "f_stat": 4.746764352589319, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "CN", + "meanA": 1.196173971631206, + "meanB": 1.226122052980133, + "diff": -0.029948081348927014, + "se": 0.017012745913161847, + "t_stat": -1.7603320182286266, + "p_tukey": 0.18467464557017998 + }, + { + "groupA": "AD", + "groupB": "MCI", + "meanA": 1.196173971631206, + "meanB": 1.258967105263158, + "diff": -0.06279313363195183, + "se": 0.020672573015623374, + "t_stat": -3.0375093407335254, + "p_tukey": 0.007200195997449144 + }, + { + "groupA": "CN", + "groupB": "MCI", + "meanA": 1.226122052980133, + "meanB": 1.258967105263158, + "diff": -0.03284505228302481, + "se": 0.02043142599323167, + "t_stat": -1.6075751293084197, + "p_tukey": 0.24396957652542495 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs3851179_a" + }, + { + "name": "y", + "value": "rightptplanumtemporale" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 53.5890921108736, + "ss_explained": 0.3384955063223536, + "ms_residual": 0.06028019360053274, + "ms_explained": 0.1692477531611768, + "p_value": 0.06087978510854993, + "f_stat": 2.807684299800939, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 1.8610209462915601, + "meanB": 1.8958427480916018, + "diff": -0.034821801800041685, + "se": 0.01753721784197218, + "t_stat": -1.9855944149078173, + "p_tukey": 0.11646103675515573 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 1.8610209462915601, + "meanB": 1.845883611111111, + "diff": 0.01513733518044913, + "se": 0.026689309354902106, + "t_stat": 0.5671684860465979, + "p_tukey": 0.8193177985136681 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 1.8958427480916018, + "meanB": 1.845883611111111, + "diff": 0.049959136980490815, + "se": 0.026674606975295233, + "t_stat": 1.8729099561527043, + "p_tukey": 0.14727367968823213 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs17125944_c" + }, + { + "name": "y", + "value": "rightententorhinalarea" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 885.0, + "df_explained": 2.0, + "ss_residual": 46.92486440565715, + "ss_explained": 0.2094996837705222, + "ms_residual": 0.05302244565610977, + "ms_explained": 0.1047498418852611, + "p_value": 0.13929260637354718, + "f_stat": 1.9755754490210087, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 1.5776727783783786, + "meanB": 1.5459546808510647, + "diff": 0.031718097527313915, + "se": 0.021158867348288198, + "t_stat": 1.4990451523332595, + "p_tukey": 0.2923681232149966 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 1.5776727783783786, + "meanB": 1.686657142857143, + "diff": -0.10898436447876447, + "se": 0.08744305030886848, + "t_stat": -1.2463467833499315, + "p_tukey": 0.42804663996623415 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 1.5459546808510647, + "meanB": 1.686657142857143, + "diff": -0.1407024620060784, + "se": 0.08916659019836458, + "t_stat": -1.5779728897680674, + "p_tukey": 0.25604346049184157 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs2718058_g" + }, + { + "name": "y", + "value": "rightmsfgsuperiorfrontalgyrusmedialsegment" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,ppmi,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 549.0, + "df_explained": 2.0, + "ss_residual": 523.1018834375674, + "ss_explained": 2.903890399980491, + "ms_residual": 0.9528267457879187, + "ms_explained": 1.4519451999902455, + "p_value": 0.21879607210458207, + "f_stat": 1.5238291813372555, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 7.800898546255505, + "meanB": 7.883345079999998, + "diff": -0.08244653374449307, + "se": 0.0894917964631383, + "t_stat": -0.9212747648713568, + "p_tukey": 0.6168695986431032 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 7.800898546255505, + "meanB": 8.023909866666667, + "diff": -0.22301132041116212, + "se": 0.13000704312287734, + "t_stat": -1.715378759905961, + "p_tukey": 0.2005117013951936 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 7.883345079999998, + "meanB": 8.023909866666667, + "diff": -0.14056478666666905, + "se": 0.1285132817791891, + "t_stat": -1.0937763375165128, + "p_tukey": 0.5181744190212358 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "apoe4" + }, + { + "name": "y", + "value": "leftporgposteriororbitalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1058.0, + "df_explained": 2.0, + "ss_residual": 67.02745972482711, + "ss_explained": 0.1438220558650695, + "ms_residual": 0.06335298650739803, + "ms_explained": 0.07191102793253475, + "p_value": 0.32178585413651856, + "f_stat": 1.1350850511859825, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 2.4391625276752777, + "meanB": 2.4146344050632926, + "diff": 0.024528122611985115, + "se": 0.01665156859260127, + "t_stat": 1.4730217442027418, + "p_tukey": 0.30488487278953225 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 2.4391625276752777, + "meanB": 2.421202096774193, + "diff": 0.017960430901084568, + "se": 0.025055909691961167, + "t_stat": 0.7168141616844556, + "p_tukey": 0.7336674729607788 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 2.4146344050632926, + "meanB": 2.421202096774193, + "diff": -0.006567691710900547, + "se": 0.02590942857580885, + "t_stat": -0.2534865518814521, + "p_tukey": 0.9 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs1476679_c" + }, + { + "name": "y", + "value": "leftofugoccipitalfusiformgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 532.0, + "df_explained": 2.0, + "ss_residual": 109.90675107247071, + "ss_explained": 0.1998816964930362, + "ms_residual": 0.20659163735426825, + "ms_explained": 0.0999408482465181, + "p_value": 0.6167318119959351, + "f_stat": 0.48376037639479647, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 4.175223226950354, + "meanB": 4.213708786407769, + "diff": -0.038485559457414276, + "se": 0.04165893043147738, + "t_stat": -0.9238249532286285, + "p_tukey": 0.6154195842998063 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 4.175223226950354, + "meanB": 4.168023829787234, + "diff": 0.007199397163120125, + "se": 0.07161118076754182, + "t_stat": 0.10053454063954344, + "p_tukey": 0.9 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 4.213708786407769, + "meanB": 4.168023829787234, + "diff": 0.0456849566205344, + "se": 0.07347406874680391, + "t_stat": 0.6217834046725726, + "p_tukey": 0.7882269152871312 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs3818361_t" + }, + { + "name": "y", + "value": "rightlorglateralorbitalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 52.83220212968574, + "ss_explained": 0.10019739541651042, + "ms_residual": 0.05942879879604695, + "ms_explained": 0.05009869770825521, + "p_value": 0.43075950856364054, + "f_stat": 0.8430037073471465, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 2.198593373913042, + "meanB": 2.2208305693950168, + "diff": -0.02223719548197467, + "se": 0.01774386710565996, + "t_stat": -1.2532327563973595, + "p_tukey": 0.424027206096247 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 2.198593373913042, + "meanB": 2.1918027777777773, + "diff": 0.0067905961352647815, + "se": 0.04188262646068936, + "t_stat": 0.1621339612413842, + "p_tukey": 0.9 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 2.2208305693950168, + "meanB": 2.1918027777777773, + "diff": 0.02902779161723945, + "se": 0.043154262289273396, + "t_stat": 0.6726517863440507, + "p_tukey": 0.7589684676967778 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "ppmicategory" + }, + { + "name": "y", + "value": "rightsmcsupplementarymotorcortex" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 710.0, + "df_explained": 3.0, + "ss_residual": 285.18193715051365, + "ss_explained": 3.662360286125364, + "ms_residual": 0.4016647002119911, + "ms_explained": 1.220786762041788, + "p_value": 0.028422605309377468, + "f_stat": 3.039318021716819, + "tukey_test": [ + { + "groupA": "GENPD", + "groupB": "HC", + "meanA": 5.274814814814812, + "meanB": 5.37234699453552, + "diff": -0.09753217972070782, + "se": 0.08457962243710934, + "t_stat": -1.1531404008480834, + "p_tukey": 0.6381381050014986 + }, + { + "groupA": "GENPD", + "groupB": "PD", + "meanA": 5.274814814814812, + "meanB": 5.4823, + "diff": -0.20748518518518821, + "se": 0.07714005542705267, + "t_stat": -2.689720457634311, + "p_tukey": 0.03672330049870853 + }, + { + "groupA": "GENPD", + "groupB": "PRODROMA", + "meanA": 5.274814814814812, + "meanB": 5.432528888888891, + "diff": -0.15771407407407878, + "se": 0.11783338103585263, + "t_stat": -1.3384498746250169, + "p_tukey": 0.5341978834857641 + }, + { + "groupA": "HC", + "groupB": "PD", + "meanA": 5.37234699453552, + "meanB": 5.4823, + "diff": -0.10995300546448039, + "se": 0.05645045406441061, + "t_stat": -1.947778937951903, + "p_tukey": 0.20923993811606012 + }, + { + "groupA": "HC", + "groupB": "PRODROMA", + "meanA": 5.37234699453552, + "meanB": 5.432528888888891, + "diff": -0.06018189435337096, + "se": 0.10545506767875032, + "t_stat": -0.5706875513721555, + "p_tukey": 0.9 + }, + { + "groupA": "PD", + "groupB": "PRODROMA", + "meanA": 5.4823, + "meanB": 5.432528888888891, + "diff": 0.049771111111109434, + "se": 0.09958738333149735, + "t_stat": 0.4997732588819602, + "p_tukey": 0.9 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs3818361_t" + }, + { + "name": "y", + "value": "rightaccumbensarea" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 2.0161921395318343, + "ss_explained": 0.006765346474643916, + "ms_residual": 0.00226793266539014, + "ms_explained": 0.003382673237321958, + "p_value": 0.22559228493920966, + "f_stat": 1.4915227814931866, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 0.3727083530434781, + "meanB": 0.37816232384341614, + "diff": -0.005453970799938035, + "se": 0.0034662898717065936, + "t_stat": -1.5734318253230293, + "p_tukey": 0.25805032732497 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 0.3727083530434781, + "meanB": 0.3687275277777778, + "diff": 0.003980825265700316, + "se": 0.008181831110245933, + "t_stat": 0.48654454144320014, + "p_tukey": 0.8654414705675695 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 0.37816232384341614, + "meanB": 0.3687275277777778, + "diff": 0.009434796065638351, + "se": 0.008430246992019191, + "t_stat": 1.119160099884399, + "p_tukey": 0.5035165569021258 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "parkinsonbroadcategory" + }, + { + "name": "y", + "value": "leftopifgopercularpartoftheinferiorfrontalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,adni,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 711.0, + "df_explained": 2.0, + "ss_residual": 106.13404432166016, + "ss_explained": 0.33472673144891674, + "ms_residual": 0.1492743239404503, + "ms_explained": 0.16736336572445837, + "p_value": 0.3264705365996791, + "f_stat": 1.1211798607188757, + "tukey_test": [ + { + "groupA": "CN", + "groupB": "Other", + "meanA": 3.340643169398909, + "meanB": 3.31910634920635, + "diff": 0.02153682019255898, + "se": 0.044726094674914584, + "t_stat": 0.4815269553287934, + "p_tukey": 0.8683732240611466 + }, + { + "groupA": "CN", + "groupB": "PD", + "meanA": 3.340643169398909, + "meanB": 3.37324074074074, + "diff": -0.03259757134183072, + "se": 0.03441344600341447, + "t_stat": -0.9472335708140481, + "p_tukey": 0.6019355949286653 + }, + { + "groupA": "Other", + "groupB": "PD", + "meanA": 3.31910634920635, + "meanB": 3.37324074074074, + "diff": -0.0541343915343897, + "se": 0.0394118692058748, + "t_stat": -1.3735555462139903, + "p_tukey": 0.35631925362793426 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "alzheimerbroadcategory" + }, + { + "name": "y", + "value": "leftpogpostcentralgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1777.0, + "df_explained": 2.0, + "ss_residual": 2738.6739859320533, + "ss_explained": 91.14314686053886, + "ms_residual": 1.5411783826291803, + "ms_explained": 45.57157343026943, + "p_value": 2.3295702889862067e-13, + "f_stat": 29.56930485394325, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "CN", + "meanA": 11.134369729729718, + "meanB": 11.558140634328357, + "diff": -0.42377090459863886, + "se": 0.11266281806178739, + "t_stat": -3.7614087050994165, + "p_tukey": 0.001 + }, + { + "groupA": "AD", + "groupB": "Other", + "meanA": 11.134369729729718, + "meanB": 11.808025666666662, + "diff": -0.6736559369369441, + "se": 0.09020508240291593, + "t_stat": -7.468048573227264, + "p_tukey": 0.001 + }, + { + "groupA": "CN", + "groupB": "Other", + "meanA": 11.558140634328357, + "meanB": 11.808025666666662, + "diff": -0.24988503233830528, + "se": 0.08333893166668291, + "t_stat": -2.9984189542737307, + "p_tukey": 0.0077482262589329 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "edsdcategory" + }, + { + "name": "y", + "value": "_3rdventricle" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 365.0, + "df_explained": 2.0, + "ss_residual": 87.86809964950699, + "ss_explained": 12.917964439393067, + "ms_residual": 0.24073451958769038, + "ms_explained": 6.458982219696534, + "p_value": 1.344657247150668e-11, + "f_stat": 26.83031179225534, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "CN", + "meanA": 1.8962894326241129, + "meanB": 1.476567019867549, + "diff": 0.41972241275656397, + "se": 0.057459597631664174, + "t_stat": 7.304652835321425, + "p_tukey": 0.001 + }, + { + "groupA": "AD", + "groupB": "MCI", + "meanA": 1.8962894326241129, + "meanB": 1.6444811842105265, + "diff": 0.2518082484135864, + "se": 0.069820458940138, + "t_stat": 3.6065109315520165, + "p_tukey": 0.0010293966015694211 + }, + { + "groupA": "CN", + "groupB": "MCI", + "meanA": 1.476567019867549, + "meanB": 1.6444811842105265, + "diff": -0.16791416434297757, + "se": 0.0690059983617324, + "t_stat": -2.4333270777819096, + "p_tukey": 0.040817498682733766 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs1476679_c" + }, + { + "name": "y", + "value": "leftmpogpostcentralgyrusmedialsegment" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 532.0, + "df_explained": 2.0, + "ss_residual": 12.377880050130488, + "ss_explained": 0.015518446617523683, + "ms_residual": 0.023266691823553547, + "ms_explained": 0.007759223308761842, + "p_value": 0.7165682836878291, + "f_stat": 0.33349061257204404, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 1.1333235602836877, + "meanB": 1.1438496359223305, + "diff": -0.010526075638642718, + "se": 0.013980382874366627, + "t_stat": -0.7529175512025879, + "p_tukey": 0.7131994962208488 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 1.1333235602836877, + "meanB": 1.1451374042553195, + "diff": -0.011813843971631766, + "se": 0.024032103437279965, + "t_stat": -0.49158593222869784, + "p_tukey": 0.8627163126073474 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 1.1438496359223305, + "meanB": 1.1451374042553195, + "diff": -0.0012877683329890477, + "se": 0.024657272805105605, + "t_stat": -0.05222671392605912, + "p_tukey": 0.9 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs17125944_c" + }, + { + "name": "y", + "value": "subjectageyears" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 885.0, + "df_explained": 2.0, + "ss_residual": 47276.190992360134, + "ss_explained": 4.007205838058221, + "ms_residual": 53.41942485012444, + "ms_explained": 2.0036029190291105, + "p_value": 0.9631891964212832, + "f_stat": 0.0375070103178852, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 73.4554054054054, + "meanB": 73.48936170212765, + "diff": -0.03395629672225198, + "se": 0.6716022491843268, + "t_stat": -0.05056012954615998, + "p_tukey": 0.9 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 73.4554054054054, + "meanB": 72.71428571428571, + "diff": 0.7411196911196924, + "se": 2.77552424221448, + "t_stat": 0.267019714635381, + "p_tukey": 0.9 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 73.48936170212765, + "meanB": 72.71428571428571, + "diff": 0.7750759878419444, + "se": 2.830231011120904, + "t_stat": 0.27385608623339125, + "p_tukey": 0.9 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs610932_a" + }, + { + "name": "y", + "value": "leftpoparietaloperculum" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,adni,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 886.0, + "df_explained": 2.0, + "ss_residual": 87.45791140384064, + "ss_explained": 0.36680612783156985, + "ms_residual": 0.09871096095241608, + "ms_explained": 0.18340306391578493, + "p_value": 0.15659454507175383, + "f_stat": 1.8579807363459357, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 2.38036210526316, + "meanB": 2.385406928104575, + "diff": -0.005044822841415275, + "se": 0.02369410915744737, + "t_stat": -0.21291464506609742, + "p_tukey": 0.9 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 2.38036210526316, + "meanB": 2.328831586206897, + "diff": 0.051530519056262936, + "se": 0.03204870507373961, + "t_stat": 1.6078814709579805, + "p_tukey": 0.24306186811061825 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 2.385406928104575, + "meanB": 2.328831586206897, + "diff": 0.05657534189767821, + "se": 0.029930282453676953, + "t_stat": 1.8902374872418852, + "p_tukey": 0.14212962079374392 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "alzheimerbroadcategory" + }, + { + "name": "y", + "value": "righttmptemporalpole" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1431.0, + "df_explained": 2.0, + "ss_residual": 1000.5292005062038, + "ss_explained": 85.10714183317279, + "ms_residual": 0.6991818312412326, + "ms_explained": 42.553570916586395, + "p_value": 4.287755880460396e-26, + "f_stat": 60.86195180593088, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "CN", + "meanA": 7.095318622589532, + "meanB": 7.711043460620525, + "diff": -0.6157248380309932, + "se": 0.059956754986911054, + "t_stat": -10.269482365505104, + "p_tukey": 0.001 + }, + { + "groupA": "AD", + "groupB": "Other", + "meanA": 7.095318622589532, + "meanB": 7.604050950920241, + "diff": -0.5087323283307095, + "se": 0.05475842874903741, + "t_stat": -9.290484404917342, + "p_tukey": 0.001 + }, + { + "groupA": "CN", + "groupB": "Other", + "meanA": 7.711043460620525, + "meanB": 7.604050950920241, + "diff": 0.10699250970028373, + "se": 0.052355099972990586, + "t_stat": 2.0435928831284818, + "p_tukey": 0.1023572480116679 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs1476679_c" + }, + { + "name": "y", + "value": "rightputamen" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 532.0, + "df_explained": 2.0, + "ss_residual": 86.89145598752515, + "ss_explained": 0.03671799035519957, + "ms_residual": 0.16332980448782922, + "ms_explained": 0.018358995177599785, + "p_value": 0.8937039623549486, + "f_stat": 0.112404439809195, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 3.7338244680851056, + "meanB": 3.7194526699029105, + "diff": 0.014371798182195139, + "se": 0.03704115052801743, + "t_stat": 0.38799545849215733, + "p_tukey": 0.9 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 3.7338244680851056, + "meanB": 3.7445787234042553, + "diff": -0.010754255319149753, + "se": 0.06367327482549373, + "t_stat": -0.1688974746253184, + "p_tukey": 0.9 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 3.7194526699029105, + "meanB": 3.7445787234042553, + "diff": -0.02512605350134489, + "se": 0.06532966670454549, + "t_stat": -0.3846040362486141, + "p_tukey": 0.9 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs3865444_t" + }, + { + "name": "y", + "value": "leftsmcsupplementarymotorcortex" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 321.30152483097896, + "ss_explained": 0.7793852317263561, + "ms_residual": 0.36141903805509445, + "ms_explained": 0.38969261586317805, + "p_value": 0.3406418225793246, + "f_stat": 1.0782293538277128, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 5.282520803571428, + "meanB": 5.249662365591397, + "diff": 0.03285843798003096, + "se": 0.042169840859809585, + "t_stat": 0.7791928380585151, + "p_tukey": 0.6980143445543423 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 5.282520803571428, + "meanB": 5.359592500000001, + "diff": -0.0770716964285727, + "se": 0.07633117178523093, + "t_stat": -1.0097014709197096, + "p_tukey": 0.566136993308943 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 5.249662365591397, + "meanB": 5.359592500000001, + "diff": -0.10993013440860366, + "se": 0.07740326489100997, + "t_stat": -1.4202260662181905, + "p_tukey": 0.33146518334359554 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs3865444_t" + }, + { + "name": "y", + "value": "leftcuncuneus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,adni,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 317.59143870806616, + "ss_explained": 0.07809874150181968, + "ms_residual": 0.35724571283247036, + "ms_explained": 0.03904937075090984, + "p_value": 0.8964674265479956, + "f_stat": 0.10930675820096394, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 4.462184129464288, + "meanB": 4.4526730107526875, + "diff": 0.009511118711600375, + "se": 0.04192566521673332, + "t_stat": 0.2268567156283142, + "p_tukey": 0.9 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 4.462184129464288, + "meanB": 4.487905555555554, + "diff": -0.025721426091266153, + "se": 0.07588919210075962, + "t_stat": -0.3389339822871654, + "p_tukey": 0.9 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 4.4526730107526875, + "meanB": 4.487905555555554, + "diff": -0.03523254480286653, + "se": 0.07695507747565318, + "t_stat": -0.4578326207781845, + "p_tukey": 0.8818689854444391 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs3865444_t" + }, + { + "name": "y", + "value": "rightlorglateralorbitalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,adni,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 52.816948125013724, + "ss_explained": 0.11545140008852281, + "ms_residual": 0.05941164018561724, + "ms_explained": 0.057725700044261405, + "p_value": 0.3788699203630247, + "f_stat": 0.9716227302244388, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 2.202644285714284, + "meanB": 2.201136236559141, + "diff": 0.0015080491551429098, + "se": 0.01709748456492526, + "t_stat": 0.08820298386094798, + "p_tukey": 0.9 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 2.202644285714284, + "meanB": 2.2436411111111108, + "diff": -0.0409968253968267, + "se": 0.030947971460439067, + "t_stat": -1.3247015381681195, + "p_tukey": 0.3831261550455808 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 2.201136236559141, + "meanB": 2.2436411111111108, + "diff": -0.04250487455196961, + "se": 0.03138264455747913, + "t_stat": -1.3544070345671306, + "p_tukey": 0.36664821891775623 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "alzheimerbroadcategory" + }, + { + "name": "y", + "value": "leftpallidum" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1063.0, + "df_explained": 2.0, + "ss_residual": 32.797417210554, + "ss_explained": 0.1464033874532077, + "ms_residual": 0.03085363801557291, + "ms_explained": 0.07320169372660384, + "p_value": 0.09373656519656515, + "f_stat": 2.3725465920633537, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "CN", + "meanA": 1.4964244594594598, + "meanB": 1.495647611940298, + "diff": 0.0007768475191618496, + "se": 0.015940698429020624, + "t_stat": 0.04873359361391407, + "p_tukey": 0.9 + }, + { + "groupA": "AD", + "groupB": "Other", + "meanA": 1.4964244594594598, + "meanB": 1.519508680555556, + "diff": -0.023084221096096202, + "se": 0.0138760835684474, + "t_stat": -1.6635977278622793, + "p_tukey": 0.22007388483862567 + }, + { + "groupA": "CN", + "groupB": "Other", + "meanA": 1.495647611940298, + "meanB": 1.519508680555556, + "diff": -0.023861068615258052, + "se": 0.012988104509916654, + "t_stat": -1.8371478761230857, + "p_tukey": 0.15821438510460983 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs1476679_c" + }, + { + "name": "y", + "value": "rightporgposteriororbitalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 532.0, + "df_explained": 2.0, + "ss_residual": 26.93575551458052, + "ss_explained": 0.02221892461948333, + "ms_residual": 0.05063111938830925, + "ms_explained": 0.011109462309741665, + "p_value": 0.8030573097851263, + "f_stat": 0.21941964633527036, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 2.291973971631205, + "meanB": 2.2953316990291257, + "diff": -0.00335772739792084, + "se": 0.020623411684472712, + "t_stat": -0.16281144212666132, + "p_tukey": 0.9 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 2.291973971631205, + "meanB": 2.315458723404255, + "diff": -0.023484751773050316, + "se": 0.0354513869387365, + "t_stat": -0.6624494498236215, + "p_tukey": 0.7649618139885163 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 2.2953316990291257, + "meanB": 2.315458723404255, + "diff": -0.020127024375129476, + "se": 0.03637361670605064, + "t_stat": -0.5533413005856359, + "p_tukey": 0.8273828736313966 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "parkinsonbroadcategory" + }, + { + "name": "y", + "value": "leftsmgsupramarginalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,ppmi,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 711.0, + "df_explained": 2.0, + "ss_residual": 687.8926525159012, + "ss_explained": 18.0702263477928, + "ms_residual": 0.9675002145090031, + "ms_explained": 9.0351131738964, + "p_value": 9.923051990533985e-05, + "f_stat": 9.338616197084392, + "tukey_test": [ + { + "groupA": "CN", + "groupB": "Other", + "meanA": 8.953214207650268, + "meanB": 8.862757936507933, + "diff": 0.0904562711423349, + "se": 0.11386597186651427, + "t_stat": 0.7944100389217008, + "p_tukey": 0.6893660266345324 + }, + { + "groupA": "CN", + "groupB": "PD", + "meanA": 8.953214207650268, + "meanB": 9.231945925925928, + "diff": -0.2787317182756599, + "se": 0.08761150516126712, + "t_stat": -3.1814510863909535, + "p_tukey": 0.004357928682962586 + }, + { + "groupA": "Other", + "groupB": "PD", + "meanA": 8.862757936507933, + "meanB": 9.231945925925928, + "diff": -0.3691879894179948, + "se": 0.10033674575929091, + "t_stat": -3.6794893697637088, + "p_tukey": 0.001 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "gender" + }, + { + "name": "y", + "value": "leftinflatvent" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 712.0, + "df_explained": 1.0, + "ss_residual": 40.32051651548807, + "ss_explained": 1.881017462368508, + "ms_residual": 0.05662993892624729, + "ms_explained": 1.881017462368508, + "p_value": 1.2281952302939099e-08, + "f_stat": 33.21595428203224, + "tukey_test": [ + { + "groupA": "F", + "groupB": "M", + "meanA": 0.4891526562499998, + "meanB": 0.5961795196506552, + "diff": -0.10702686340065537, + "se": 0.018570321854815962, + "t_stat": -5.763328403104624, + "p_tukey": 0.001 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs3764650_g" + }, + { + "name": "y", + "value": "leftofugoccipitalfusiformgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 200.73221480599727, + "ss_explained": 0.45196909299432203, + "ms_residual": 0.2257955172170948, + "ms_explained": 0.22598454649716102, + "p_value": 0.3679853633544565, + "f_stat": 1.000837170207788, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 4.109188186889818, + "meanB": 4.049341503267974, + "diff": 0.059846683621843866, + "se": 0.04231671931499137, + "t_stat": 1.4142562228504851, + "p_tukey": 0.33457378488799183 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 4.109188186889818, + "meanB": 4.09462, + "diff": 0.01456818688981798, + "se": 0.10285110422417165, + "t_stat": 0.1416434660542441, + "p_tukey": 0.9 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 4.049341503267974, + "meanB": 4.09462, + "diff": -0.045278496732025886, + "se": 0.10834768246255391, + "t_stat": -0.41790000213132944, + "p_tukey": 0.9 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs11767557_c" + }, + { + "name": "y", + "value": "rightthalamusproper" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,adni,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 882.0, + "df_explained": 2.0, + "ss_residual": 380.8183839978608, + "ss_explained": 1.598124601345213, + "ms_residual": 0.4317668752810213, + "ms_explained": 0.7990623006726065, + "p_value": 0.15773994098330943, + "f_stat": 1.8506799535108525, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 6.62811694915254, + "meanB": 6.682099019607845, + "diff": -0.05398207045530512, + "se": 0.049244407760300715, + "t_stat": -1.0962071209804205, + "p_tukey": 0.516648379891875 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 6.62811694915254, + "meanB": 6.476302249999999, + "diff": 0.1518146991525411, + "se": 0.10735911766223541, + "t_stat": 1.4140829624752342, + "p_tukey": 0.3346668164266895 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 6.682099019607845, + "meanB": 6.476302249999999, + "diff": 0.2057967696078462, + "se": 0.11174692530222086, + "t_stat": 1.8416325017557884, + "p_tukey": 0.1569041090652249 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "neurodegenerativescategories" + }, + { + "name": "y", + "value": "leftpallidum" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,ppmi,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1417.0, + "df_explained": 2.0, + "ss_residual": 44.104498761840894, + "ss_explained": 0.4448310926586538, + "ms_residual": 0.0311252637698242, + "ms_explained": 0.2224155463293269, + "p_value": 0.0008168744744670828, + "f_stat": 7.1458204490770525, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "MCI", + "meanA": 1.4943744628099174, + "meanB": 1.5227006134969325, + "diff": -0.02832615068701516, + "se": 0.011553469734578508, + "t_stat": -2.4517440507276795, + "p_tukey": 0.03810367806589976 + }, + { + "groupA": "AD", + "groupB": "PD", + "meanA": 1.4943744628099174, + "meanB": 1.482732098765432, + "diff": 0.011642364044485376, + "se": 0.012751354101698186, + "t_stat": 0.9130296242761294, + "p_tukey": 0.6213592805070274 + }, + { + "groupA": "MCI", + "groupB": "PD", + "meanA": 1.5227006134969325, + "meanB": 1.482732098765432, + "diff": 0.03996851473150054, + "se": 0.011162017638028675, + "t_stat": 3.580760757385739, + "p_tukey": 0.0010300335570025965 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "apoe4" + }, + { + "name": "y", + "value": "rightcalccalcarinecortex" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1058.0, + "df_explained": 2.0, + "ss_residual": 247.34901512236823, + "ss_explained": 0.29765762463324175, + "ms_residual": 0.23378923924609474, + "ms_explained": 0.14882881231662087, + "p_value": 0.529294014465502, + "f_stat": 0.6365939373281353, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 3.2344129704797067, + "meanB": 3.2453055443037973, + "diff": -0.01089257382409059, + "se": 0.03198776252922215, + "t_stat": -0.3405231551953586, + "p_tukey": 0.9 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 3.2344129704797067, + "meanB": 3.2887097580645164, + "diff": -0.054296787584809714, + "se": 0.04813255188080054, + "t_stat": -1.1280679179295292, + "p_tukey": 0.4983053075677233 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 3.2453055443037973, + "meanB": 3.2887097580645164, + "diff": -0.043404213760719124, + "se": 0.049772166744643305, + "t_stat": -0.8720579512522523, + "p_tukey": 0.6448467408421823 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "neurodegenerativescategories" + }, + { + "name": "y", + "value": "leftsplsuperiorparietallobule" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1200.0, + "df_explained": 2.0, + "ss_residual": 1757.487807702761, + "ss_explained": 125.25923028144202, + "ms_residual": 1.4645731730856342, + "ms_explained": 62.62961514072101, + "p_value": 1.1487054908332896e-18, + "f_stat": 42.76304952982983, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "MCI", + "meanA": 10.069746666666664, + "meanB": 10.641410295138899, + "diff": -0.5716636284722352, + "se": 0.09560250108741884, + "t_stat": -5.979588629689786, + "p_tukey": 0.001 + }, + { + "groupA": "AD", + "groupB": "PD", + "meanA": 10.069746666666664, + "meanB": 11.003265925925925, + "diff": -0.9335192592592616, + "se": 0.10106139971493695, + "t_stat": -9.237149513982901, + "p_tukey": 0.001 + }, + { + "groupA": "MCI", + "groupB": "PD", + "meanA": 10.641410295138899, + "meanB": 11.003265925925925, + "diff": -0.3618556307870264, + "se": 0.0784786073769839, + "t_stat": -4.610882416004122, + "p_tukey": 0.001 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs3818361_t" + }, + { + "name": "y", + "value": "rightmorgmedialorbitalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,adni,ppmi" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 889.0, + "df_explained": 2.0, + "ss_residual": 124.37703345354062, + "ss_explained": 0.22137074984712274, + "ms_residual": 0.13990667430094558, + "ms_explained": 0.11068537492356137, + "p_value": 0.45364787414268615, + "f_stat": 0.791137202543119, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 3.6878029391304334, + "meanB": 3.7207497153024898, + "diff": -0.032946776172056325, + "se": 0.027225070724424705, + "t_stat": -1.2101631068491017, + "p_tukey": 0.44933514422367404 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 3.6878029391304334, + "meanB": 3.6767786111111116, + "diff": 0.01102432801932185, + "se": 0.06426206084203644, + "t_stat": 0.17155266847760953, + "p_tukey": 0.9 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 3.7207497153024898, + "meanB": 3.6767786111111116, + "diff": 0.043971104191378174, + "se": 0.06621317866560655, + "t_stat": 0.6640838738983288, + "p_tukey": 0.7638695105968875 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs190982_g" + }, + { + "name": "y", + "value": "rightangangulargyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 533.0, + "df_explained": 2.0, + "ss_residual": 639.8150526325799, + "ss_explained": 0.40558886037972813, + "ms_residual": 1.2004034758584987, + "ms_explained": 0.20279443018986407, + "p_value": 0.8446060000358199, + "f_stat": 0.16893855630068927, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 10.179309541284399, + "meanB": 10.143750445344125, + "diff": 0.03555909594027362, + "se": 0.10181537734200742, + "t_stat": 0.34925074059124955, + "p_tukey": 0.9 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 10.179309541284399, + "meanB": 10.095555070422538, + "diff": 0.0837544708618605, + "se": 0.14971148643951238, + "t_stat": 0.5594391776725806, + "p_tukey": 0.8238942529688638 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 10.143750445344125, + "meanB": 10.095555070422538, + "diff": 0.048195374921586875, + "se": 0.1475365185637655, + "t_stat": 0.3266674270936979, + "p_tukey": 0.9 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "alzheimerbroadcategory" + }, + { + "name": "y", + "value": "righttmptemporalpole" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,ppmi,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 2145.0, + "df_explained": 2.0, + "ss_residual": 1484.9559203488136, + "ss_explained": 136.7817158243313, + "ms_residual": 0.6922871423537592, + "ms_explained": 68.39085791216564, + "p_value": 9.09089461661994e-42, + "f_stat": 98.78972716384477, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "CN", + "meanA": 7.095318622589532, + "meanB": 7.711043460620525, + "diff": -0.6157248380309932, + "se": 0.05966040337207155, + "t_stat": -10.320494050149662, + "p_tukey": 0.001 + }, + { + "groupA": "AD", + "groupB": "Other", + "meanA": 7.095318622589532, + "meanB": 7.7812780527086325, + "diff": -0.6859594301191008, + "se": 0.04913171990087835, + "t_stat": -13.961640901295572, + "p_tukey": 0.001 + }, + { + "groupA": "CN", + "groupB": "Other", + "meanA": 7.711043460620525, + "meanB": 7.7812780527086325, + "diff": -0.07023459208810756, + "se": 0.0464654215852202, + "t_stat": -1.5115453533396521, + "p_tukey": 0.2861309243635789 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "rs744373_c" + }, + { + "name": "y", + "value": "leftgregyrusrectus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 835.0, + "df_explained": 2.0, + "ss_residual": 42.33806454626433, + "ss_explained": 0.20665175326931665, + "ms_residual": 0.05070426891768183, + "ms_explained": 0.10332587663465832, + "p_value": 0.1309608315701658, + "f_stat": 2.037814149384689, + "tukey_test": [ + { + "groupA": "0", + "groupB": "1", + "meanA": 2.0646141849148414, + "meanB": 2.04929111445783, + "diff": 0.015323070457011312, + "se": 0.01661600892073321, + "t_stat": 0.9221871828614278, + "p_tukey": 0.6162216020657083 + }, + { + "groupA": "0", + "groupB": "2", + "meanA": 2.0646141849148414, + "meanB": 2.013705052631578, + "diff": 0.050909132283263325, + "se": 0.025633907181631853, + "t_stat": 1.986007514287272, + "p_tukey": 0.11639462091980424 + }, + { + "groupA": "1", + "groupB": "2", + "meanA": 2.04929111445783, + "meanB": 2.013705052631578, + "diff": 0.03558606182625201, + "se": 0.02620024519086027, + "t_stat": 1.3582339236529706, + "p_tukey": 0.36457703643646056 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "ppmicategory" + }, + { + "name": "y", + "value": "leftliglingualgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "adni,ppmi,edsd" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 710.0, + "df_explained": 3.0, + "ss_residual": 413.68145606511143, + "ss_explained": 4.90342253959502, + "ms_residual": 0.5826499381198753, + "ms_explained": 1.6344741798650066, + "p_value": 0.03891564000230695, + "f_stat": 2.8052421753260828, + "tukey_test": [ + { + "groupA": "GENPD", + "groupB": "HC", + "meanA": 7.479967901234566, + "meanB": 7.619655191256834, + "diff": -0.13968729002226787, + "se": 0.10186799573894179, + "t_stat": -1.3712578617944542, + "p_tukey": 0.515794455791181 + }, + { + "groupA": "GENPD", + "groupB": "PD", + "meanA": 7.479967901234566, + "meanB": 7.730508888888887, + "diff": -0.250540987654321, + "se": 0.09290775497830772, + "t_stat": -2.6966638868070567, + "p_tukey": 0.03602445076323291 + }, + { + "groupA": "GENPD", + "groupB": "PRODROMA", + "meanA": 7.479967901234566, + "meanB": 7.642439999999999, + "diff": -0.16247209876543245, + "se": 0.1419189399455018, + "t_stat": -1.1448232267505891, + "p_tukey": 0.6428047537058592 + }, + { + "groupA": "HC", + "groupB": "PD", + "meanA": 7.619655191256834, + "meanB": 7.730508888888887, + "diff": -0.1108536976320531, + "se": 0.06798912608495727, + "t_stat": -1.6304621638103347, + "p_tukey": 0.36276167118271463 + }, + { + "groupA": "HC", + "groupB": "PRODROMA", + "meanA": 7.619655191256834, + "meanB": 7.642439999999999, + "diff": -0.022784808743164575, + "se": 0.12701045565598884, + "t_stat": -0.17939317377836853, + "p_tukey": 0.9 + }, + { + "groupA": "PD", + "groupB": "PRODROMA", + "meanA": 7.730508888888887, + "meanB": 7.642439999999999, + "diff": 0.08806888888888853, + "se": 0.11994339592150169, + "t_stat": 0.7342537553841331, + "p_tukey": 0.8730988535273132 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "adnicategory" + }, + { + "name": "y", + "value": "leftaorganteriororbitalgyrus" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "ppmi,edsd,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 1063.0, + "df_explained": 2.0, + "ss_residual": 36.126931601434386, + "ss_explained": 0.7532594036596517, + "ms_residual": 0.03398582464857421, + "ms_explained": 0.37662970182982586, + "p_value": 1.7244552772412502e-05, + "f_stat": 11.081964487380075, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "CN", + "meanA": 1.453428477477478, + "meanB": 1.5146916716417917, + "diff": -0.061263194164313806, + "se": 0.01673027431808154, + "t_stat": -3.6618164770976005, + "p_tukey": 0.001 + }, + { + "groupA": "AD", + "groupB": "MCI", + "meanA": 1.453428477477478, + "meanB": 1.5204590138888878, + "diff": -0.06703053641140988, + "se": 0.0145633947969375, + "t_stat": -4.602672477539754, + "p_tukey": 0.001 + }, + { + "groupA": "CN", + "groupB": "MCI", + "meanA": 1.5146916716417917, + "meanB": 1.5204590138888878, + "diff": -0.0057673422470960745, + "se": 0.013631432292027111, + "t_stat": -0.42309143482078077, + "p_tukey": 0.9 + } + ] + } + }, + { + "input": [ + { + "name": "x", + "value": "edsdcategory" + }, + { + "name": "y", + "value": "_4thventricle" + }, + { + "name": "pathology", + "value": "dementia" + }, + { + "name": "dataset", + "value": "edsd,ppmi,adni" + }, + { + "name": "filter", + "value": "" + } + ], + "output": { + "df_residual": 365.0, + "df_explained": 2.0, + "ss_residual": 88.35745293366638, + "ss_explained": 0.6015694927064578, + "ms_residual": 0.2420752135168942, + "ms_explained": 0.3007847463532289, + "p_value": 0.28987214970860337, + "f_stat": 1.242525998359751, + "tukey_test": [ + { + "groupA": "AD", + "groupB": "CN", + "meanA": 2.0466812765957445, + "meanB": 1.9576682119205289, + "diff": 0.08901306467521564, + "se": 0.05761937690845697, + "t_stat": 1.5448460127681618, + "p_tukey": 0.27167490456882504 + }, + { + "groupA": "AD", + "groupB": "MCI", + "meanA": 2.0466812765957445, + "meanB": 2.02053552631579, + "diff": 0.026145750279954694, + "se": 0.07001461035947633, + "t_stat": 0.3734327756123251, + "p_tukey": 0.9 + }, + { + "groupA": "CN", + "groupB": "MCI", + "meanA": 1.9576682119205289, + "meanB": 2.02053552631579, + "diff": -0.06286731439526094, + "se": 0.06919788499106946, + "t_stat": -0.9085149698343304, + "p_tukey": 0.6243482531816258 + } + ] + } + } + ] +} \ No newline at end of file diff --git a/Exareme-Docker/src/mip-algorithms/tests/algorithm_tests/expected/logistic_regression_expected.json b/Exareme-Docker/src/mip-algorithms/tests/algorithm_tests/expected/logistic_regression_expected.json index 131a82a766bbd155e1fb3bdd5f2237e9735e91ae..8182080307878c337dd10849e48e28fc81060902 100644 --- a/Exareme-Docker/src/mip-algorithms/tests/algorithm_tests/expected/logistic_regression_expected.json +++ b/Exareme-Docker/src/mip-algorithms/tests/algorithm_tests/expected/logistic_regression_expected.json @@ -4,11 +4,11 @@ "input": [ { "name": "x", - "value": "leftaorganteriororbitalgyrus" + "value": "rightgregyrusrectus,leftventraldc" }, { "name": "y", - "value": "rs11136000_t" + "value": "rs17125944_c" }, { "name": "pathology", @@ -16,22 +16,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,edsd,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs11136000_t\", \"field\": \"rs11136000_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs11136000_t\", \"field\": \"rs11136000_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "1" } ], "output": { - "n_obs": 891, + "n_obs": 148, "coeff": [ - 0.9568752843019066, - -0.39341798676952355 + -10.24609712739566, + 1.0378676891181042, + 1.043862206902038 ], "coeff_names": [ "Intercept", - "leftaorganteriororbitalgyrus" + "rightgregyrusrectus", + "leftventraldc" ] } }, @@ -39,11 +49,11 @@ "input": [ { "name": "x", - "value": "rightthalamusproper" + "value": "leftpogpostcentralgyrus" }, { "name": "y", - "value": "rs3865444_t" + "value": "rs11767557_c" }, { "name": "pathology", @@ -51,22 +61,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs3865444_t\", \"field\": \"rs3865444_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs3865444_t\", \"field\": \"rs3865444_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "1" + }, + { + "name": "negative_level", + "value": "2" } ], "output": { - "n_obs": 892, + "n_obs": 295, "coeff": [ - 0.24085347982644031, - -0.06435328636810261 + 0.9109022179496812, + -0.24102751954632437 ], "coeff_names": [ "Intercept", - "rightthalamusproper" + "leftpogpostcentralgyrus" ] } }, @@ -74,11 +92,11 @@ "input": [ { "name": "x", - "value": "leftmfcmedialfrontalcortex" + "value": "leftlorglateralorbitalgyrus,leftsplsuperiorparietallobule,rightprgprecentralgyrus,leftaorganteriororbitalgyrus,minimentalstate,leftaccumbensarea,leftmorgmedialorbitalgyrus,leftmfgmiddlefrontalgyrus,leftsogsuperioroccipitalgyrus" }, { "name": "y", - "value": "rs17125944_c" + "value": "rs610932_a" }, { "name": "pathology", @@ -86,22 +104,46 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,edsd,adni" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs17125944_c\", \"field\": \"rs17125944_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs17125944_c\", \"field\": \"rs17125944_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "1" } ], "output": { - "n_obs": 888, + "n_obs": 604, "coeff": [ - -5.937813880660727, - 1.5928252776590544 + -1.887057375748268, + 1.6060102647373724, + -0.25045075254920923, + 0.029668321523642543, + -1.888691366729222, + 0.032922245653337796, + 2.862057882506842, + -0.37417091184187307, + 0.11239914969402853, + -0.17305558062363313 ], "coeff_names": [ "Intercept", - "leftmfcmedialfrontalcortex" + "leftlorglateralorbitalgyrus", + "leftsplsuperiorparietallobule", + "rightprgprecentralgyrus", + "leftaorganteriororbitalgyrus", + "minimentalstate", + "leftaccumbensarea", + "leftmorgmedialorbitalgyrus", + "leftmfgmiddlefrontalgyrus", + "leftsogsuperioroccipitalgyrus" ] } }, @@ -109,11 +151,11 @@ "input": [ { "name": "x", - "value": "leftangangulargyrus" + "value": "rightfugfusiformgyrus,leftpoparietaloperculum" }, { "name": "y", - "value": "adnicategory" + "value": "neurodegenerativescategories" }, { "name": "pathology", @@ -121,22 +163,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,edsd,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"adnicategory\", \"field\": \"adnicategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"MCI\"}, {\"id\": \"adnicategory\", \"field\": \"adnicategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"AD\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "PD" + }, + { + "name": "negative_level", + "value": "MCI" } ], "output": { - "n_obs": 1066, + "n_obs": 1057, "coeff": [ - -3.5366510821397656, - 0.5104637945428868 + -7.802842579609495, + 1.2643397912460468, + -0.8637824443833834 ], "coeff_names": [ "Intercept", - "leftangangulargyrus" + "rightfugfusiformgyrus", + "leftpoparietaloperculum" ] } }, @@ -144,11 +196,11 @@ "input": [ { "name": "x", - "value": "rightptplanumtemporale,rightofugoccipitalfusiformgyrus" + "value": "leftmfgmiddlefrontalgyrus,rightptplanumtemporale,leftmogmiddleoccipitalgyrus,subjectageyears,leftthalamusproper" }, { "name": "y", - "value": "adnicategory" + "value": "rs744373_c" }, { "name": "pathology", @@ -156,24 +208,38 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,edsd,adni" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"adnicategory\", \"field\": \"adnicategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"MCI\"}, {\"id\": \"adnicategory\", \"field\": \"adnicategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"AD\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "1" + }, + { + "name": "negative_level", + "value": "2" } ], "output": { - "n_obs": 1066, + "n_obs": 427, "coeff": [ - -4.351623793569322, - 0.051749542619963014, - 1.2931944125820143 + -0.5036172805575505, + -0.07028222763846272, + 0.4450313713332199, + 0.20421756332367325, + 0.01337342497593909, + -0.36644577665868927 ], "coeff_names": [ "Intercept", + "leftmfgmiddlefrontalgyrus", "rightptplanumtemporale", - "rightofugoccipitalfusiformgyrus" + "leftmogmiddleoccipitalgyrus", + "subjectageyears", + "leftthalamusproper" ] } }, @@ -181,11 +247,11 @@ "input": [ { "name": "x", - "value": "leftofugoccipitalfusiformgyrus" + "value": "leftmogmiddleoccipitalgyrus" }, { "name": "y", - "value": "rs2718058_g" + "value": "rs1476679_c" }, { "name": "pathology", @@ -193,22 +259,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,ppmi,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs2718058_g\", \"field\": \"rs2718058_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}, {\"id\": \"rs2718058_g\", \"field\": \"rs2718058_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "0" } ], "output": { - "n_obs": 552, + "n_obs": 329, "coeff": [ - -0.00906114173760145, - 0.026230226617702773 + -1.5435694803210853, + -0.04055076389809981 ], "coeff_names": [ "Intercept", - "leftofugoccipitalfusiformgyrus" + "leftmogmiddleoccipitalgyrus" ] } }, @@ -216,11 +290,11 @@ "input": [ { "name": "x", - "value": "subjectageyears,leftmtgmiddletemporalgyrus,leftprgprecentralgyrus,leftaccumbensarea,rightopifgopercularpartoftheinferiorfrontalgyrus,av45" + "value": "rightlateralventricle,rightaorganteriororbitalgyrus" }, { "name": "y", - "value": "rs744373_c" + "value": "agegroup" }, { "name": "pathology", @@ -228,32 +302,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs744373_c\", \"field\": \"rs744373_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs744373_c\", \"field\": \"rs744373_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "60-69y" + }, + { + "name": "negative_level", + "value": "+80y" } ], "output": { - "n_obs": 302, + "n_obs": 164, "coeff": [ - 0.6166475079054518, - 0.011592642963260133, - 0.23712339823999726, - -0.16564913558691735, - -5.088317479953032, - -0.25591283656310804, - -0.5381718374286617 + -7.446227255855247, + -0.09164431036986305, + 6.009780577491134 ], "coeff_names": [ "Intercept", - "subjectageyears", - "leftmtgmiddletemporalgyrus", - "leftprgprecentralgyrus", - "leftaccumbensarea", - "rightopifgopercularpartoftheinferiorfrontalgyrus", - "av45" + "rightlateralventricle", + "rightaorganteriororbitalgyrus" ] } }, @@ -261,11 +335,11 @@ "input": [ { "name": "x", - "value": "leftfofrontaloperculum,rightliglingualgyrus,rightacgganteriorcingulategyrus,rightitginferiortemporalgyrus" + "value": "leftangangulargyrus,minimentalstate" }, { "name": "y", - "value": "gender" + "value": "neurodegenerativescategories" }, { "name": "pathology", @@ -273,28 +347,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,edsd,adni" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"gender\", \"field\": \"gender\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"M\"}, {\"id\": \"gender\", \"field\": \"gender\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"F\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "AD" + }, + { + "name": "negative_level", + "value": "MCI" } ], "output": { - "n_obs": 1066, + "n_obs": 1013, "coeff": [ - -14.207650604771972, - 0.946403653251587, - 0.5349351252832627, - 1.295253257480506, - 0.28798579535481533 + -24.141913728208884, + 0.18315093264229407, + 0.9123662741821833 ], "coeff_names": [ "Intercept", - "leftfofrontaloperculum", - "rightliglingualgyrus", - "rightacgganteriorcingulategyrus", - "rightitginferiortemporalgyrus" + "leftangangulargyrus", + "minimentalstate" ] } }, @@ -302,11 +380,11 @@ "input": [ { "name": "x", - "value": "lefttmptemporalpole,rightsmcsupplementarymotorcortex" + "value": "rightmpogpostcentralgyrusmedialsegment" }, { "name": "y", - "value": "neurodegenerativescategories" + "value": "dataset" }, { "name": "pathology", @@ -314,24 +392,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "edsd,adni" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"neurodegenerativescategories\", \"field\": \"neurodegenerativescategories\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"MCI\"}, {\"id\": \"neurodegenerativescategories\", \"field\": \"neurodegenerativescategories\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"AD\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "adni" + }, + { + "name": "negative_level", + "value": "edsd" } ], "output": { - "n_obs": 798, + "n_obs": 1503, "coeff": [ - -3.1948701275486266, - 0.8420596495062856, - -0.35284828994809253 + 0.6699545770638934, + -1.4751498701280281 ], "coeff_names": [ "Intercept", - "lefttmptemporalpole", - "rightsmcsupplementarymotorcortex" + "rightmpogpostcentralgyrusmedialsegment" ] } }, @@ -339,11 +423,11 @@ "input": [ { "name": "x", - "value": "lefttrifgtriangularpartoftheinferiorfrontalgyrus,leftsfgsuperiorfrontalgyrus" + "value": "leftmprgprecentralgyrusmedialsegment,rightporgposteriororbitalgyrus" }, { "name": "y", - "value": "alzheimerbroadcategory" + "value": "rs190982_g" }, { "name": "pathology", @@ -355,20 +439,28 @@ }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"alzheimerbroadcategory\", \"field\": \"alzheimerbroadcategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"AD\"}, {\"id\": \"alzheimerbroadcategory\", \"field\": \"alzheimerbroadcategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"CN\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "1" + }, + { + "name": "negative_level", + "value": "0" } ], "output": { - "n_obs": 1066, + "n_obs": 465, "coeff": [ - -2.390051022370145, - -0.13915782100039362, - 0.22028247693721242 + -0.3441575225518922, + -0.12714746578507513, + 0.3503676805258186 ], "coeff_names": [ "Intercept", - "lefttrifgtriangularpartoftheinferiorfrontalgyrus", - "leftsfgsuperiorfrontalgyrus" + "leftmprgprecentralgyrusmedialsegment", + "rightporgposteriororbitalgyrus" ] } }, @@ -376,11 +468,11 @@ "input": [ { "name": "x", - "value": "rightventraldc,leftsmgsupramarginalgyrus,leftpogpostcentralgyrus,fdg" + "value": "leftmogmiddleoccipitalgyrus" }, { "name": "y", - "value": "rs17125944_c" + "value": "rs610932_a" }, { "name": "pathology", @@ -388,28 +480,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs17125944_c\", \"field\": \"rs17125944_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs17125944_c\", \"field\": \"rs17125944_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "0" + }, + { + "name": "negative_level", + "value": "2" } ], "output": { - "n_obs": 585, + "n_obs": 430, "coeff": [ - -18.591367685732404, - 1.9740925114880148, - -0.6562916446996475, - 0.13759404990968352, - 1.7228633686951755 + 0.031501376682706764, + -0.1181342691900003 ], "coeff_names": [ "Intercept", - "rightventraldc", - "leftsmgsupramarginalgyrus", - "leftpogpostcentralgyrus", - "fdg" + "leftmogmiddleoccipitalgyrus" ] } }, @@ -417,11 +511,11 @@ "input": [ { "name": "x", - "value": "rightptplanumtemporale" + "value": "leftsplsuperiorparietallobule,rightcuncuneus,subjectageyears" }, { "name": "y", - "value": "rs3851179_a" + "value": "parkinsonbroadcategory" }, { "name": "pathology", @@ -429,22 +523,34 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs3851179_a\", \"field\": \"rs3851179_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}, {\"id\": \"rs3851179_a\", \"field\": \"rs3851179_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "CN" + }, + { + "name": "negative_level", + "value": "Other" } ], "output": { - "n_obs": 892, + "n_obs": 309, "coeff": [ - -1.0642730632845443, - 0.5693390509605262 + -2.513327405458319, + 0.028934633612383225, + -0.12730829090416645, + 0.039087557171190564 ], "coeff_names": [ "Intercept", - "rightptplanumtemporale" + "leftsplsuperiorparietallobule", + "rightcuncuneus", + "subjectageyears" ] } }, @@ -452,11 +558,11 @@ "input": [ { "name": "x", - "value": "leftlorglateralorbitalgyrus,leftmtgmiddletemporalgyrus" + "value": "leftioginferioroccipitalgyrus,rightmprgprecentralgyrusmedialsegment,rightcerebellumexterior,leftfofrontaloperculum,leftpcggposteriorcingulategyrus" }, { "name": "y", - "value": "apoe4" + "value": "agegroup" }, { "name": "pathology", @@ -464,24 +570,38 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"apoe4\", \"field\": \"apoe4\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"apoe4\", \"field\": \"apoe4\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "60-69y" + }, + { + "name": "negative_level", + "value": "70-79y" } ], "output": { - "n_obs": 1061, + "n_obs": 717, "coeff": [ - -1.5230435085449143, - 0.37848115730502796, - -0.040258892302938186 + 2.9672126523480085, + -0.17309611453913917, + -0.19731182791188956, + -0.08899133273792842, + 0.2537363284482626, + 0.4663093378323604 ], "coeff_names": [ "Intercept", - "leftlorglateralorbitalgyrus", - "leftmtgmiddletemporalgyrus" + "leftioginferioroccipitalgyrus", + "rightmprgprecentralgyrusmedialsegment", + "rightcerebellumexterior", + "leftfofrontaloperculum", + "leftpcggposteriorcingulategyrus" ] } }, @@ -489,11 +609,11 @@ "input": [ { "name": "x", - "value": "leftventraldc,rightcuncuneus,minimentalstate,leftacgganteriorcingulategyrus,rightioginferioroccipitalgyrus" + "value": "rightpinsposteriorinsula,rightphgparahippocampalgyrus,rightgregyrusrectus" }, { "name": "y", - "value": "rs17125944_c" + "value": "rs3865444_t" }, { "name": "pathology", @@ -501,30 +621,34 @@ }, { "name": "dataset", - "value": "adni" + "value": "edsd,adni,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs17125944_c\", \"field\": \"rs17125944_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs17125944_c\", \"field\": \"rs17125944_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "1" + }, + { + "name": "negative_level", + "value": "2" } ], "output": { - "n_obs": 888, + "n_obs": 444, "coeff": [ - -15.320948956494192, - 0.7502215734184606, - -0.09350640120536051, - 0.09821150298913772, - -0.1719446638876618, - 1.0775113719294511 + -3.8238064261949205, + 0.15883783651736288, + 0.13703594006807454, + 0.6960665694130399 ], "coeff_names": [ "Intercept", - "leftventraldc", - "rightcuncuneus", - "minimentalstate", - "leftacgganteriorcingulategyrus", - "rightioginferioroccipitalgyrus" + "rightpinsposteriorinsula", + "rightphgparahippocampalgyrus", + "rightgregyrusrectus" ] } }, @@ -532,11 +656,11 @@ "input": [ { "name": "x", - "value": "rightsplsuperiorparietallobule" + "value": "subjectage" }, { "name": "y", - "value": "rs1476679_c" + "value": "alzheimerbroadcategory" }, { "name": "pathology", @@ -544,22 +668,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,adni,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs1476679_c\", \"field\": \"rs1476679_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs1476679_c\", \"field\": \"rs1476679_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "AD" + }, + { + "name": "negative_level", + "value": "Other" } ], "output": { - "n_obs": 535, + "n_obs": 798, "coeff": [ - -0.6521674329795288, - -0.07839933676657071 + 3.8688620985108724, + -0.03945736573531516 ], "coeff_names": [ "Intercept", - "rightsplsuperiorparietallobule" + "subjectage" ] } }, @@ -567,11 +699,11 @@ "input": [ { "name": "x", - "value": "rightfofrontaloperculum" + "value": "leftthalamusproper" }, { "name": "y", - "value": "rs17125944_c" + "value": "rs11767557_c" }, { "name": "pathology", @@ -583,18 +715,26 @@ }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs17125944_c\", \"field\": \"rs17125944_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}, {\"id\": \"rs17125944_c\", \"field\": \"rs17125944_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "1" } ], "output": { - "n_obs": 888, + "n_obs": 295, "coeff": [ - -11.97600669762332, - 3.6413605006974534 + 1.8402732475985693, + -0.5384575828924021 ], "coeff_names": [ "Intercept", - "rightfofrontaloperculum" + "leftthalamusproper" ] } }, @@ -602,11 +742,11 @@ "input": [ { "name": "x", - "value": "rightppplanumpolare,rightioginferioroccipitalgyrus,leftpallidum" + "value": "rightamygdala,lefttrifgtriangularpartoftheinferiorfrontalgyrus,leftstgsuperiortemporalgyrus" }, { "name": "y", - "value": "rs3851179_a" + "value": "rs744373_c" }, { "name": "pathology", @@ -614,26 +754,34 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs3851179_a\", \"field\": \"rs3851179_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs3851179_a\", \"field\": \"rs3851179_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "0" + }, + { + "name": "negative_level", + "value": "2" } ], "output": { - "n_obs": 892, + "n_obs": 506, "coeff": [ - -1.4101951680533995, - 0.16747225354193834, - 0.13585772840513516, - 0.12560555529703837 + 0.8200558870419613, + 0.8421635161357559, + -0.09196028579954638, + -0.37634150207351846 ], "coeff_names": [ "Intercept", - "rightppplanumpolare", - "rightioginferioroccipitalgyrus", - "leftpallidum" + "rightamygdala", + "lefttrifgtriangularpartoftheinferiorfrontalgyrus", + "leftstgsuperiortemporalgyrus" ] } }, @@ -641,11 +789,11 @@ "input": [ { "name": "x", - "value": "rightainsanteriorinsula" + "value": "rightprgprecentralgyrus,leftmtgmiddletemporalgyrus,rightitginferiortemporalgyrus,leftitginferiortemporalgyrus" }, { "name": "y", - "value": "rs10498633_t" + "value": "ppmicategory" }, { "name": "pathology", @@ -653,22 +801,36 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,ppmi,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs10498633_t\", \"field\": \"rs10498633_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs10498633_t\", \"field\": \"rs10498633_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "PRODROMA" + }, + { + "name": "negative_level", + "value": "PD" } ], "output": { - "n_obs": 892, + "n_obs": 450, "coeff": [ - -1.529046015917264, - 0.21499604683094814 + 0.0485835254366006, + -0.10542032821896419, + -0.5014159972935222, + 0.6295347615429456, + -0.12080590350584795 ], "coeff_names": [ "Intercept", - "rightainsanteriorinsula" + "rightprgprecentralgyrus", + "leftmtgmiddletemporalgyrus", + "rightitginferiortemporalgyrus", + "leftitginferiortemporalgyrus" ] } }, @@ -676,11 +838,11 @@ "input": [ { "name": "x", - "value": "leftpcuprecuneus,rightcalccalcarinecortex,rightlorglateralorbitalgyrus,rightgregyrusrectus" + "value": "cerebellarvermallobulesviiix" }, { "name": "y", - "value": "gender" + "value": "agegroup" }, { "name": "pathology", @@ -688,28 +850,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "edsd,adni" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"gender\", \"field\": \"gender\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"M\"}, {\"id\": \"gender\", \"field\": \"gender\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"F\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "50-59y" + }, + { + "name": "negative_level", + "value": "-50y" } ], "output": { - "n_obs": 1066, + "n_obs": 103, "coeff": [ - -12.083063083766234, - 0.9664926622707743, - 0.20353862691878596, - 0.9576727429199288, - -0.34673717553822075 + -7.212216994556497, + 2.2734276120998937 ], "coeff_names": [ "Intercept", - "leftpcuprecuneus", - "rightcalccalcarinecortex", - "rightlorglateralorbitalgyrus", - "rightgregyrusrectus" + "cerebellarvermallobulesviiix" ] } }, @@ -717,11 +881,11 @@ "input": [ { "name": "x", - "value": "leftmsfgsuperiorfrontalgyrusmedialsegment,leftpallidum,rightsfgsuperiorfrontalgyrus,rightcocentraloperculum,rightmfcmedialfrontalcortex,leftitginferiortemporalgyrus,rightpogpostcentralgyrus" + "value": "leftcuncuneus,leftstgsuperiortemporalgyrus,leftsfgsuperiorfrontalgyrus,rightthalamusproper" }, { "name": "y", - "value": "alzheimerbroadcategory" + "value": "apoe4" }, { "name": "pathology", @@ -729,34 +893,36 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,adni" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"alzheimerbroadcategory\", \"field\": \"alzheimerbroadcategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"Other\"}, {\"id\": \"alzheimerbroadcategory\", \"field\": \"alzheimerbroadcategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"CN\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "0" + }, + { + "name": "negative_level", + "value": "2" } ], "output": { - "n_obs": 1066, + "n_obs": 666, "coeff": [ - -0.2548201008151604, - 0.8209117980831723, - 0.7083245320396317, - -0.21493001701665937, - 0.7504454815181234, - -0.8982213544177662, - -0.6222247682767882, - 0.2609361699696955 + -0.4081172025523565, + 0.5029774674437706, + -0.6678537295572301, + 0.1759326370542681, + -0.18173914807415958 ], "coeff_names": [ "Intercept", - "leftmsfgsuperiorfrontalgyrusmedialsegment", - "leftpallidum", - "rightsfgsuperiorfrontalgyrus", - "rightcocentraloperculum", - "rightmfcmedialfrontalcortex", - "leftitginferiortemporalgyrus", - "rightpogpostcentralgyrus" + "leftcuncuneus", + "leftstgsuperiortemporalgyrus", + "leftsfgsuperiorfrontalgyrus", + "rightthalamusproper" ] } }, @@ -764,11 +930,11 @@ "input": [ { "name": "x", - "value": "rightpcggposteriorcingulategyrus,leftainsanteriorinsula" + "value": "leftphgparahippocampalgyrus,rightpallidum,leftsmcsupplementarymotorcortex,leftcalccalcarinecortex" }, { "name": "y", - "value": "rs1476679_c" + "value": "gender" }, { "name": "pathology", @@ -776,24 +942,36 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs1476679_c\", \"field\": \"rs1476679_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}, {\"id\": \"rs1476679_c\", \"field\": \"rs1476679_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "M" + }, + { + "name": "negative_level", + "value": "F" } ], "output": { - "n_obs": 535, + "n_obs": 714, "coeff": [ - -1.4521048412794717, - 0.6180169259582396, - -0.6670192729958567 + -11.411679172405371, + 2.9369638034258805, + 1.6743012931398147, + 0.06160922762646168, + -0.06517023680095362 ], "coeff_names": [ "Intercept", - "rightpcggposteriorcingulategyrus", - "leftainsanteriorinsula" + "leftphgparahippocampalgyrus", + "rightpallidum", + "leftsmcsupplementarymotorcortex", + "leftcalccalcarinecortex" ] } }, @@ -801,11 +979,11 @@ "input": [ { "name": "x", - "value": "rightppplanumpolare,leftcuncuneus,rightpoparietaloperculum,leftliglingualgyrus,rightlorglateralorbitalgyrus" + "value": "rightitginferiortemporalgyrus,leftfugfusiformgyrus,rightocpoccipitalpole" }, { "name": "y", - "value": "rs11767557_c" + "value": "rs610932_a" }, { "name": "pathology", @@ -813,30 +991,34 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs11767557_c\", \"field\": \"rs11767557_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}, {\"id\": \"rs11767557_c\", \"field\": \"rs11767557_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "1" + }, + { + "name": "negative_level", + "value": "2" } ], "output": { - "n_obs": 885, + "n_obs": 604, "coeff": [ - -4.7118311563472295, - 0.19108576317185655, - -0.8350516548392282, - -0.5040727542739285, - 0.6688580719276176, - 0.6646988595689725 + -0.3263317786314602, + -0.01744524936099609, + -0.14119883328598434, + 0.11561127153512543 ], "coeff_names": [ "Intercept", - "rightppplanumpolare", - "leftcuncuneus", - "rightpoparietaloperculum", - "leftliglingualgyrus", - "rightlorglateralorbitalgyrus" + "rightitginferiortemporalgyrus", + "leftfugfusiformgyrus", + "rightocpoccipitalpole" ] } }, @@ -844,11 +1026,11 @@ "input": [ { "name": "x", - "value": "rightsfgsuperiorfrontalgyrus" + "value": "lefthippocampus" }, { "name": "y", - "value": "rs190982_g" + "value": "alzheimerbroadcategory" }, { "name": "pathology", @@ -860,18 +1042,26 @@ }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs190982_g\", \"field\": \"rs190982_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}, {\"id\": \"rs190982_g\", \"field\": \"rs190982_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "AD" + }, + { + "name": "negative_level", + "value": "Other" } ], "output": { - "n_obs": 536, + "n_obs": 798, "coeff": [ - 0.40315724949801723, - -0.0191892516717394 + -4.653489339984502, + 1.9772815995159987 ], "coeff_names": [ "Intercept", - "rightsfgsuperiorfrontalgyrus" + "lefthippocampus" ] } }, @@ -879,11 +1069,11 @@ "input": [ { "name": "x", - "value": "righttrifgtriangularpartoftheinferiorfrontalgyrus,leftacgganteriorcingulategyrus" + "value": "_4thventricle,rightmprgprecentralgyrusmedialsegment,rightmsfgsuperiorfrontalgyrusmedialsegment,righthippocampus,rightpallidum,rightcuncuneus,leftventraldc,rightfofrontaloperculum,leftmprgprecentralgyrusmedialsegment,leftorifgorbitalpartoftheinferiorfrontalgyrus,rightpcuprecuneus" }, { "name": "y", - "value": "rs744373_c" + "value": "parkinsonbroadcategory" }, { "name": "pathology", @@ -891,24 +1081,50 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs744373_c\", \"field\": \"rs744373_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs744373_c\", \"field\": \"rs744373_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "PD" + }, + { + "name": "negative_level", + "value": "CN" } ], "output": { - "n_obs": 838, + "n_obs": 588, "coeff": [ - 0.29293524455929626, - 0.06927828338193956, - -0.3956174642076495 + -1.7478921172966868, + 0.04192783356412181, + -0.03618188655872278, + -0.028242651163004336, + -0.3452733775203597, + -0.3858839018830991, + -0.19011831038483798, + 0.33723739471760095, + -0.4606183439730656, + -0.1747179545646882, + 0.47972622736704396, + 0.408962912747866 ], "coeff_names": [ "Intercept", - "righttrifgtriangularpartoftheinferiorfrontalgyrus", - "leftacgganteriorcingulategyrus" + "_4thventricle", + "rightmprgprecentralgyrusmedialsegment", + "rightmsfgsuperiorfrontalgyrusmedialsegment", + "righthippocampus", + "rightpallidum", + "rightcuncuneus", + "leftventraldc", + "rightfofrontaloperculum", + "leftmprgprecentralgyrusmedialsegment", + "leftorifgorbitalpartoftheinferiorfrontalgyrus", + "rightpcuprecuneus" ] } }, @@ -916,11 +1132,11 @@ "input": [ { "name": "x", - "value": "leftofugoccipitalfusiformgyrus" + "value": "leftpallidum" }, { "name": "y", - "value": "neurodegenerativescategories" + "value": "rs11767557_c" }, { "name": "pathology", @@ -932,18 +1148,26 @@ }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"neurodegenerativescategories\", \"field\": \"neurodegenerativescategories\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"MCI\"}, {\"id\": \"neurodegenerativescategories\", \"field\": \"neurodegenerativescategories\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"AD\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "0" + }, + { + "name": "negative_level", + "value": "1" } ], "output": { - "n_obs": 798, + "n_obs": 845, "coeff": [ - -4.1638592890444235, - 1.2711333655613788 + -1.096393779925412, + 0.17039381623629657 ], "coeff_names": [ "Intercept", - "leftofugoccipitalfusiformgyrus" + "leftpallidum" ] } }, @@ -951,11 +1175,11 @@ "input": [ { "name": "x", - "value": "leftscasubcallosalarea,leftsplsuperiorparietallobule" + "value": "leftsplsuperiorparietallobule" }, { "name": "y", - "value": "agegroup" + "value": "alzheimerbroadcategory" }, { "name": "pathology", @@ -963,23 +1187,29 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"agegroup\", \"field\": \"agegroup\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"70-79y\"}, {\"id\": \"agegroup\", \"field\": \"agegroup\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"+80y\"}], \"valid\": true}" - } + "value": "" + }, + { + "name": "positive_level", + "value": "Other" + }, + { + "name": "negative_level", + "value": "CN" + } ], "output": { - "n_obs": 1066, + "n_obs": 941, "coeff": [ - 0.43136932463596106, - -1.8060453587312115, - 0.25873552353285584 + -1.211699994187357, + 0.2699329246657337 ], "coeff_names": [ "Intercept", - "leftscasubcallosalarea", "leftsplsuperiorparietallobule" ] } @@ -988,11 +1218,11 @@ "input": [ { "name": "x", - "value": "rightttgtransversetemporalgyrus,rightopifgopercularpartoftheinferiorfrontalgyrus,leftgregyrusrectus,rightmprgprecentralgyrusmedialsegment,rightfugfusiformgyrus,rightsogsuperioroccipitalgyrus,leftamygdala" + "value": "rightphgparahippocampalgyrus,rightmprgprecentralgyrusmedialsegment" }, { "name": "y", - "value": "rs610932_a" + "value": "edsdcategory" }, { "name": "pathology", @@ -1000,34 +1230,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs610932_a\", \"field\": \"rs610932_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs610932_a\", \"field\": \"rs610932_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "MCI" + }, + { + "name": "negative_level", + "value": "AD" } ], "output": { - "n_obs": 889, + "n_obs": 217, "coeff": [ - -0.6656703717320157, - -1.2308737998830845, - 0.2986002127899796, - -5.142808297051767e-05, - 0.5511295954025995, - 0.0039017872152385465, - -0.09259311513553806, - -0.8548725099912434 + -3.502101644175029, + 1.8950775241886877, + -0.9403966268537737 ], "coeff_names": [ "Intercept", - "rightttgtransversetemporalgyrus", - "rightopifgopercularpartoftheinferiorfrontalgyrus", - "leftgregyrusrectus", - "rightmprgprecentralgyrusmedialsegment", - "rightfugfusiformgyrus", - "rightsogsuperioroccipitalgyrus", - "leftamygdala" + "rightphgparahippocampalgyrus", + "rightmprgprecentralgyrusmedialsegment" ] } }, @@ -1035,11 +1263,11 @@ "input": [ { "name": "x", - "value": "rightcuncuneus" + "value": "leftorifgorbitalpartoftheinferiorfrontalgyrus" }, { "name": "y", - "value": "rs610932_a" + "value": "rs2718058_g" }, { "name": "pathology", @@ -1047,22 +1275,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "edsd,adni,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs610932_a\", \"field\": \"rs610932_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs610932_a\", \"field\": \"rs610932_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "1" + }, + { + "name": "negative_level", + "value": "0" } ], "output": { - "n_obs": 889, + "n_obs": 477, "coeff": [ - -0.6210838898133173, - -0.11322958111167308 + -1.6630899063903704, + 1.189578929872913 ], "coeff_names": [ "Intercept", - "rightcuncuneus" + "leftorifgorbitalpartoftheinferiorfrontalgyrus" ] } }, @@ -1070,11 +1306,11 @@ "input": [ { "name": "x", - "value": "leftofugoccipitalfusiformgyrus,leftmorgmedialorbitalgyrus,leftopifgopercularpartoftheinferiorfrontalgyrus,rightsmgsupramarginalgyrus,rightaccumbensarea" + "value": "rightitginferiortemporalgyrus" }, { "name": "y", - "value": "rs10498633_t" + "value": "rs17125944_c" }, { "name": "pathology", @@ -1082,30 +1318,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs10498633_t\", \"field\": \"rs10498633_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs10498633_t\", \"field\": \"rs10498633_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "1" + }, + { + "name": "negative_level", + "value": "2" } ], "output": { - "n_obs": 892, + "n_obs": 148, "coeff": [ - -1.5158269479674413, - 0.1861132144868296, - 0.7466690056772864, - -0.870731089469438, - -0.257670754636077, - 2.1733118639662514 + -8.124940605918136, + 0.4567379979769731 ], "coeff_names": [ "Intercept", - "leftofugoccipitalfusiformgyrus", - "leftmorgmedialorbitalgyrus", - "leftopifgopercularpartoftheinferiorfrontalgyrus", - "rightsmgsupramarginalgyrus", - "rightaccumbensarea" + "rightitginferiortemporalgyrus" ] } }, @@ -1113,11 +1349,11 @@ "input": [ { "name": "x", - "value": "leftphgparahippocampalgyrus" + "value": "rightgregyrusrectus" }, { "name": "y", - "value": "neurodegenerativescategories" + "value": "rs3764650_g" }, { "name": "pathology", @@ -1125,22 +1361,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "edsd,adni,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"neurodegenerativescategories\", \"field\": \"neurodegenerativescategories\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"MCI\"}, {\"id\": \"neurodegenerativescategories\", \"field\": \"neurodegenerativescategories\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"AD\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "0" + }, + { + "name": "negative_level", + "value": "1" } ], "output": { - "n_obs": 798, + "n_obs": 870, "coeff": [ - -4.914838715312238, - 1.9812623423602511 + -0.060868845680195356, + -0.7347980685001815 ], "coeff_names": [ "Intercept", - "leftphgparahippocampalgyrus" + "rightgregyrusrectus" ] } }, @@ -1148,11 +1392,11 @@ "input": [ { "name": "x", - "value": "leftmtgmiddletemporalgyrus,lefttmptemporalpole" + "value": "av45" }, { "name": "y", - "value": "rs190982_g" + "value": "adnicategory" }, { "name": "pathology", @@ -1160,24 +1404,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,edsd,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs190982_g\", \"field\": \"rs190982_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs190982_g\", \"field\": \"rs190982_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "MCI" + }, + { + "name": "negative_level", + "value": "CN" } ], "output": { - "n_obs": 536, + "n_obs": 378, "coeff": [ - -1.2609601431581796, - -0.13583619559593874, - 0.24521019937575272 + -1.2828363094552206, + 1.9715113193105747 ], "coeff_names": [ "Intercept", - "leftmtgmiddletemporalgyrus", - "lefttmptemporalpole" + "av45" ] } }, @@ -1185,7 +1435,7 @@ "input": [ { "name": "x", - "value": "leftpogpostcentralgyrus" + "value": "rightacgganteriorcingulategyrus,lefttmptemporalpole" }, { "name": "y", @@ -1197,22 +1447,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,edsd,adni" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs3818361_t\", \"field\": \"rs3818361_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs3818361_t\", \"field\": \"rs3818361_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "1" } ], "output": { - "n_obs": 892, + "n_obs": 317, "coeff": [ - -2.945425916697887, - 0.015107620156917596 + -0.5913475099691421, + 0.47055537562393496, + -0.46840102161868097 ], "coeff_names": [ "Intercept", - "leftpogpostcentralgyrus" + "rightacgganteriorcingulategyrus", + "lefttmptemporalpole" ] } }, @@ -1220,11 +1480,11 @@ "input": [ { "name": "x", - "value": "rightaccumbensarea" + "value": "leftpinsposteriorinsula,leftopifgopercularpartoftheinferiorfrontalgyrus,rightprgprecentralgyrus,leftmcggmiddlecingulategyrus,rightppplanumpolare" }, { "name": "y", - "value": "rs190982_g" + "value": "rs610932_a" }, { "name": "pathology", @@ -1236,18 +1496,34 @@ }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs190982_g\", \"field\": \"rs190982_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs190982_g\", \"field\": \"rs190982_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "0" } ], "output": { - "n_obs": 536, + "n_obs": 430, "coeff": [ - -1.386202381062999, - 0.3667404558574915 + -0.1865343493022558, + -1.0530985501257457, + -0.2607839146217663, + 0.0897169839746564, + -0.01824842976025091, + 0.8650849362269832 ], "coeff_names": [ "Intercept", - "rightaccumbensarea" + "leftpinsposteriorinsula", + "leftopifgopercularpartoftheinferiorfrontalgyrus", + "rightprgprecentralgyrus", + "leftmcggmiddlecingulategyrus", + "rightppplanumpolare" ] } }, @@ -1255,11 +1531,11 @@ "input": [ { "name": "x", - "value": "rightsplsuperiorparietallobule,subjectageyears,leftfofrontaloperculum,rightainsanteriorinsula" + "value": "leftopifgopercularpartoftheinferiorfrontalgyrus,rightaorganteriororbitalgyrus,leftmprgprecentralgyrusmedialsegment,rightpinsposteriorinsula" }, { "name": "y", - "value": "rs11136000_t" + "value": "rs11767557_c" }, { "name": "pathology", @@ -1271,24 +1547,32 @@ }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs11136000_t\", \"field\": \"rs11136000_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs11136000_t\", \"field\": \"rs11136000_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "0" + }, + { + "name": "negative_level", + "value": "1" } ], "output": { - "n_obs": 891, + "n_obs": 845, "coeff": [ - 0.38762147061961055, - -0.11874756852769433, - -0.01699793272240049, - -0.12209310430341644, - 0.2585140287671877 + -1.3709682142801334, + -0.03683084969571315, + 0.11999815671376629, + 0.3291397161899135, + -0.17824800521702552 ], "coeff_names": [ "Intercept", - "rightsplsuperiorparietallobule", - "subjectageyears", - "leftfofrontaloperculum", - "rightainsanteriorinsula" + "leftopifgopercularpartoftheinferiorfrontalgyrus", + "rightaorganteriororbitalgyrus", + "leftmprgprecentralgyrusmedialsegment", + "rightpinsposteriorinsula" ] } }, @@ -1296,11 +1580,11 @@ "input": [ { "name": "x", - "value": "leftainsanteriorinsula,leftphgparahippocampalgyrus" + "value": "rightcerebellumwhitematter,rightporgposteriororbitalgyrus,cerebellarvermallobulesviiix" }, { "name": "y", - "value": "rs190982_g" + "value": "agegroup" }, { "name": "pathology", @@ -1308,24 +1592,34 @@ }, { "name": "dataset", - "value": "adni" + "value": "edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs190982_g\", \"field\": \"rs190982_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs190982_g\", \"field\": \"rs190982_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "+80y" + }, + { + "name": "negative_level", + "value": "60-69y" } ], "output": { - "n_obs": 536, + "n_obs": 164, "coeff": [ - -0.46007813906270406, - -0.935240779056278, - 1.062605707404301 + -6.653509005239804, + 0.46516605033706754, + 1.286864075117247, + -0.6967677334714831 ], "coeff_names": [ "Intercept", - "leftainsanteriorinsula", - "leftphgparahippocampalgyrus" + "rightcerebellumwhitematter", + "rightporgposteriororbitalgyrus", + "cerebellarvermallobulesviiix" ] } }, @@ -1333,11 +1627,11 @@ "input": [ { "name": "x", - "value": "leftioginferioroccipitalgyrus" + "value": "rightfugfusiformgyrus,leftpcggposteriorcingulategyrus,leftsplsuperiorparietallobule,leftopifgopercularpartoftheinferiorfrontalgyrus" }, { "name": "y", - "value": "neurodegenerativescategories" + "value": "dataset" }, { "name": "pathology", @@ -1345,22 +1639,36 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,edsd,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"neurodegenerativescategories\", \"field\": \"neurodegenerativescategories\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"MCI\"}, {\"id\": \"neurodegenerativescategories\", \"field\": \"neurodegenerativescategories\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"AD\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "edsd" + }, + { + "name": "negative_level", + "value": "adni" } ], "output": { - "n_obs": 798, + "n_obs": 1503, "coeff": [ - -3.829090028605075, - 0.794315859120346 + 0.591022303675533, + 0.3932015458276881, + -0.48199819600123994, + -0.28385865854661035, + 0.22151222097684184 ], "coeff_names": [ "Intercept", - "leftioginferioroccipitalgyrus" + "rightfugfusiformgyrus", + "leftpcggposteriorcingulategyrus", + "leftsplsuperiorparietallobule", + "leftopifgopercularpartoftheinferiorfrontalgyrus" ] } }, @@ -1368,11 +1676,11 @@ "input": [ { "name": "x", - "value": "rightfrpfrontalpole" + "value": "montrealcognitiveassessment" }, { "name": "y", - "value": "rs17125944_c" + "value": "rs1476679_c" }, { "name": "pathology", @@ -1380,22 +1688,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,edsd,adni" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs17125944_c\", \"field\": \"rs17125944_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs17125944_c\", \"field\": \"rs17125944_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "0" + }, + { + "name": "negative_level", + "value": "1" } ], "output": { - "n_obs": 888, + "n_obs": 297, "coeff": [ - -2.1968349806183753, - 0.14194383887609413 + -0.04276777012626225, + -0.012962905316583607 ], "coeff_names": [ "Intercept", - "rightfrpfrontalpole" + "montrealcognitiveassessment" ] } }, @@ -1403,11 +1719,11 @@ "input": [ { "name": "x", - "value": "rightppplanumpolare" + "value": "leftpoparietaloperculum" }, { "name": "y", - "value": "rs3865444_t" + "value": "edsdcategory" }, { "name": "pathology", @@ -1415,22 +1731,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,adni,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs3865444_t\", \"field\": \"rs3865444_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs3865444_t\", \"field\": \"rs3865444_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "AD" + }, + { + "name": "negative_level", + "value": "CN" } ], "output": { - "n_obs": 892, + "n_obs": 292, "coeff": [ - -2.7994124023092097, - 0.5540363618015265 + -1.1916856241386793, + 0.546773539841545 ], "coeff_names": [ "Intercept", - "rightppplanumpolare" + "leftpoparietaloperculum" ] } }, @@ -1438,11 +1762,11 @@ "input": [ { "name": "x", - "value": "leftthalamusproper,rightscasubcallosalarea,rightofugoccipitalfusiformgyrus,rightmogmiddleoccipitalgyrus,rightfugfusiformgyrus,leftmogmiddleoccipitalgyrus" + "value": "rightmtgmiddletemporalgyrus,leftpcggposteriorcingulategyrus" }, { "name": "y", - "value": "rs10498633_t" + "value": "alzheimerbroadcategory" }, { "name": "pathology", @@ -1450,32 +1774,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,edsd,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs10498633_t\", \"field\": \"rs10498633_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs10498633_t\", \"field\": \"rs10498633_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "AD" + }, + { + "name": "negative_level", + "value": "Other" } ], "output": { - "n_obs": 892, + "n_obs": 1729, "coeff": [ - -4.063810879188127, - 0.440806317545449, - 0.4618178747522415, - -1.0141067385953326, - 0.9078985885045029, - -0.3297104258636581, - 0.06283189023492099 + -5.673416371094314, + 0.6837627020954553, + -0.48606660032946364 ], "coeff_names": [ "Intercept", - "leftthalamusproper", - "rightscasubcallosalarea", - "rightofugoccipitalfusiformgyrus", - "rightmogmiddleoccipitalgyrus", - "rightfugfusiformgyrus", - "leftmogmiddleoccipitalgyrus" + "rightmtgmiddletemporalgyrus", + "leftpcggposteriorcingulategyrus" ] } }, @@ -1483,7 +1807,7 @@ "input": [ { "name": "x", - "value": "leftpogpostcentralgyrus" + "value": "rightthalamusproper,rightmsfgsuperiorfrontalgyrusmedialsegment,rightlorglateralorbitalgyrus" }, { "name": "y", @@ -1495,22 +1819,34 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs2718058_g\", \"field\": \"rs2718058_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}, {\"id\": \"rs2718058_g\", \"field\": \"rs2718058_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "0" + }, + { + "name": "negative_level", + "value": "1" } ], "output": { - "n_obs": 552, + "n_obs": 477, "coeff": [ - 0.43220762573665344, - -0.13749801589795094 + -0.916864054770386, + -0.139610599235787, + -0.039217958620721145, + 1.014262393232165 ], "coeff_names": [ "Intercept", - "leftpogpostcentralgyrus" + "rightthalamusproper", + "rightmsfgsuperiorfrontalgyrusmedialsegment", + "rightlorglateralorbitalgyrus" ] } }, @@ -1518,11 +1854,11 @@ "input": [ { "name": "x", - "value": "leftlorglateralorbitalgyrus,leftliglingualgyrus,leftphgparahippocampalgyrus,minimentalstate" + "value": "rightfugfusiformgyrus,rightfrpfrontalpole" }, { "name": "y", - "value": "adnicategory" + "value": "dataset" }, { "name": "pathology", @@ -1530,28 +1866,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,edsd,adni" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"adnicategory\", \"field\": \"adnicategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"CN\"}, {\"id\": \"adnicategory\", \"field\": \"adnicategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"MCI\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "ppmi" + }, + { + "name": "negative_level", + "value": "adni" } ], "output": { - "n_obs": 1066, + "n_obs": 1780, "coeff": [ - 17.65621921151174, - 0.09774046827212685, - 0.9123215778364032, - -1.9006296662757778, - -0.6328608648825176 + -6.631735367092149, + 1.5273538357566243, + -1.3230042049235584 ], "coeff_names": [ "Intercept", - "leftlorglateralorbitalgyrus", - "leftliglingualgyrus", - "leftphgparahippocampalgyrus", - "minimentalstate" + "rightfugfusiformgyrus", + "rightfrpfrontalpole" ] } }, @@ -1559,11 +1899,11 @@ "input": [ { "name": "x", - "value": "leftpogpostcentralgyrus" + "value": "leftocpoccipitalpole,righttrifgtriangularpartoftheinferiorfrontalgyrus" }, { "name": "y", - "value": "rs2718058_g" + "value": "dataset" }, { "name": "pathology", @@ -1571,22 +1911,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,edsd,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs2718058_g\", \"field\": \"rs2718058_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}, {\"id\": \"rs2718058_g\", \"field\": \"rs2718058_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "adni" + }, + { + "name": "negative_level", + "value": "edsd" } ], "output": { - "n_obs": 552, + "n_obs": 1503, "coeff": [ - 0.43220762573665344, - -0.13749801589795094 + 0.5111535121327709, + -0.21584819073785041, + -0.19683508190412996 ], "coeff_names": [ "Intercept", - "leftpogpostcentralgyrus" + "leftocpoccipitalpole", + "righttrifgtriangularpartoftheinferiorfrontalgyrus" ] } }, @@ -1594,11 +1944,11 @@ "input": [ { "name": "x", - "value": "leftlorglateralorbitalgyrus" + "value": "rightinflatvent,rightlateralventricle,leftmsfgsuperiorfrontalgyrusmedialsegment,rightpinsposteriorinsula" }, { "name": "y", - "value": "rs2718058_g" + "value": "parkinsonbroadcategory" }, { "name": "pathology", @@ -1606,22 +1956,36 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs2718058_g\", \"field\": \"rs2718058_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs2718058_g\", \"field\": \"rs2718058_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "PD" + }, + { + "name": "negative_level", + "value": "CN" } ], "output": { - "n_obs": 552, + "n_obs": 588, "coeff": [ - -3.2859396167657335, - 0.933918450825627 + -2.038089550236314, + 0.8676642742938832, + -0.003733664665680176, + 0.06020754692049442, + 0.8234775882563731 ], "coeff_names": [ "Intercept", - "leftlorglateralorbitalgyrus" + "rightinflatvent", + "rightlateralventricle", + "leftmsfgsuperiorfrontalgyrusmedialsegment", + "rightpinsposteriorinsula" ] } }, @@ -1629,11 +1993,11 @@ "input": [ { "name": "x", - "value": "rightstgsuperiortemporalgyrus,leftppplanumpolare" + "value": "leftpallidum" }, { "name": "y", - "value": "agegroup" + "value": "rs2718058_g" }, { "name": "pathology", @@ -1641,24 +2005,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "edsd,ppmi,adni" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"agegroup\", \"field\": \"agegroup\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"50-59y\"}, {\"id\": \"agegroup\", \"field\": \"agegroup\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"+80y\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "0" } ], "output": { - "n_obs": 1066, + "n_obs": 302, "coeff": [ - -1.4062790798142242, - 0.6765211682875053, - -2.1854574979261727 + -0.6713648697396178, + -0.2896272322795972 ], "coeff_names": [ "Intercept", - "rightstgsuperiortemporalgyrus", - "leftppplanumpolare" + "leftpallidum" ] } }, @@ -1666,11 +2036,11 @@ "input": [ { "name": "x", - "value": "leftsogsuperioroccipitalgyrus,minimentalstate" + "value": "leftpcggposteriorcingulategyrus" }, { "name": "y", - "value": "rs3764650_g" + "value": "rs744373_c" }, { "name": "pathology", @@ -1678,24 +2048,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,ppmi,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs3764650_g\", \"field\": \"rs3764650_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs3764650_g\", \"field\": \"rs3764650_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "0" + }, + { + "name": "negative_level", + "value": "1" } ], "output": { - "n_obs": 892, + "n_obs": 743, "coeff": [ - -10.372481543413091, - 0.6649710802096618, - 0.21830931121593874 + 0.6745929930003557, + -0.20456533186940856 ], "coeff_names": [ "Intercept", - "leftsogsuperioroccipitalgyrus", - "minimentalstate" + "leftpcggposteriorcingulategyrus" ] } }, @@ -1703,11 +2079,11 @@ "input": [ { "name": "x", - "value": "leftsmgsupramarginalgyrus" + "value": "minimentalstate" }, { "name": "y", - "value": "apoe4" + "value": "edsdcategory" }, { "name": "pathology", @@ -1715,22 +2091,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,adni,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"apoe4\", \"field\": \"apoe4\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"apoe4\", \"field\": \"apoe4\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "CN" + }, + { + "name": "negative_level", + "value": "MCI" } ], "output": { - "n_obs": 1061, + "n_obs": 218, "coeff": [ - 0.354210499140561, - -0.2112398909585319 + 24.193802220767523, + -0.8961191545644284 ], "coeff_names": [ "Intercept", - "leftsmgsupramarginalgyrus" + "minimentalstate" ] } }, @@ -1738,11 +2122,11 @@ "input": [ { "name": "x", - "value": "leftacgganteriorcingulategyrus,leftliglingualgyrus" + "value": "rightmorgmedialorbitalgyrus,rightioginferioroccipitalgyrus,rightpogpostcentralgyrus" }, { "name": "y", - "value": "rs2718058_g" + "value": "alzheimerbroadcategory" }, { "name": "pathology", @@ -1750,24 +2134,34 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs2718058_g\", \"field\": \"rs2718058_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs2718058_g\", \"field\": \"rs2718058_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "AD" + }, + { + "name": "negative_level", + "value": "Other" } ], "output": { - "n_obs": 552, + "n_obs": 1015, "coeff": [ - -2.101009041781112, - 0.6492506997780333, - -0.27377284964464516 + -4.2483770858289285, + -0.18443717341829732, + 0.7182710169128176, + 0.09678152495286418 ], "coeff_names": [ "Intercept", - "leftacgganteriorcingulategyrus", - "leftliglingualgyrus" + "rightmorgmedialorbitalgyrus", + "rightioginferioroccipitalgyrus", + "rightpogpostcentralgyrus" ] } }, @@ -1775,11 +2169,11 @@ "input": [ { "name": "x", - "value": "rightmsfgsuperiorfrontalgyrusmedialsegment,leftaorganteriororbitalgyrus" + "value": "righttrifgtriangularpartoftheinferiorfrontalgyrus,rightaorganteriororbitalgyrus" }, { "name": "y", - "value": "rs3851179_a" + "value": "rs3818361_t" }, { "name": "pathology", @@ -1787,24 +2181,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs3851179_a\", \"field\": \"rs3851179_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs3851179_a\", \"field\": \"rs3851179_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "1" + }, + { + "name": "negative_level", + "value": "0" } ], "output": { - "n_obs": 892, + "n_obs": 856, "coeff": [ - -0.44645260667415965, - -0.26867527271029823, - 0.824837304405069 + -1.4060204130456497, + 0.06703236634461789, + 0.2648154438870048 ], "coeff_names": [ "Intercept", - "rightmsfgsuperiorfrontalgyrusmedialsegment", - "leftaorganteriororbitalgyrus" + "righttrifgtriangularpartoftheinferiorfrontalgyrus", + "rightaorganteriororbitalgyrus" ] } }, @@ -1812,11 +2214,11 @@ "input": [ { "name": "x", - "value": "rightpogpostcentralgyrus" + "value": "rightscasubcallosalarea" }, { "name": "y", - "value": "adnicategory" + "value": "rs17125944_c" }, { "name": "pathology", @@ -1824,22 +2226,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,ppmi,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"adnicategory\", \"field\": \"adnicategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"AD\"}, {\"id\": \"adnicategory\", \"field\": \"adnicategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"CN\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "0" + }, + { + "name": "negative_level", + "value": "2" } ], "output": { - "n_obs": 1066, + "n_obs": 747, "coeff": [ - -1.994727652243887, - 0.21743112385817145 + -11.01099070159633, + 5.1996488480233305 ], "coeff_names": [ "Intercept", - "rightpogpostcentralgyrus" + "rightscasubcallosalarea" ] } }, @@ -1847,11 +2257,11 @@ "input": [ { "name": "x", - "value": "leftmfgmiddlefrontalgyrus" + "value": "leftcuncuneus,rightmcggmiddlecingulategyrus,_4thventricle" }, { "name": "y", - "value": "agegroup" + "value": "edsdcategory" }, { "name": "pathology", @@ -1859,22 +2269,34 @@ }, { "name": "dataset", - "value": "adni" + "value": "edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"agegroup\", \"field\": \"agegroup\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"70-79y\"}, {\"id\": \"agegroup\", \"field\": \"agegroup\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"+80y\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "AD" + }, + { + "name": "negative_level", + "value": "MCI" } ], "output": { - "n_obs": 1066, + "n_obs": 217, "coeff": [ - -2.0329837667108053, - 0.16223022913684 + -3.048530388522974, + 0.4376776493416783, + 0.20385524157350393, + -0.15757559738915994 ], "coeff_names": [ "Intercept", - "leftmfgmiddlefrontalgyrus" + "leftcuncuneus", + "rightmcggmiddlecingulategyrus", + "_4thventricle" ] } }, @@ -1882,11 +2304,11 @@ "input": [ { "name": "x", - "value": "rightpoparietaloperculum" + "value": "rightpcggposteriorcingulategyrus,leftporgposteriororbitalgyrus,rightangangulargyrus" }, { "name": "y", - "value": "rs190982_g" + "value": "rs744373_c" }, { "name": "pathology", @@ -1894,22 +2316,34 @@ }, { "name": "dataset", - "value": "adni" + "value": "edsd,adni" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs190982_g\", \"field\": \"rs190982_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs190982_g\", \"field\": \"rs190982_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "0" + }, + { + "name": "negative_level", + "value": "1" } ], "output": { - "n_obs": 536, + "n_obs": 743, "coeff": [ - -0.9415789315014987, - -0.13949015941387574 + 1.399721625968006, + 0.07626516023610486, + -0.465944739307175, + -0.07784347867236269 ], "coeff_names": [ "Intercept", - "rightpoparietaloperculum" + "rightpcggposteriorcingulategyrus", + "leftporgposteriororbitalgyrus", + "rightangangulargyrus" ] } }, @@ -1917,11 +2351,11 @@ "input": [ { "name": "x", - "value": "lefttmptemporalpole,rightliglingualgyrus" + "value": "leftmorgmedialorbitalgyrus,leftmogmiddleoccipitalgyrus" }, { "name": "y", - "value": "agegroup" + "value": "rs610932_a" }, { "name": "pathology", @@ -1929,24 +2363,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,edsd,adni" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"agegroup\", \"field\": \"agegroup\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"60-69y\"}, {\"id\": \"agegroup\", \"field\": \"agegroup\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"+80y\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "1" + }, + { + "name": "negative_level", + "value": "2" } ], "output": { - "n_obs": 1066, + "n_obs": 604, "coeff": [ - -5.56509142845987, - 0.4511967101790501, - 0.3093073993125139 + -0.6062892279554329, + 0.05617715537125425, + -0.12746287432919234 ], "coeff_names": [ "Intercept", - "lefttmptemporalpole", - "rightliglingualgyrus" + "leftmorgmedialorbitalgyrus", + "leftmogmiddleoccipitalgyrus" ] } }, @@ -1954,11 +2396,11 @@ "input": [ { "name": "x", - "value": "rightscasubcallosalarea,rightphgparahippocampalgyrus" + "value": "rightmpogpostcentralgyrusmedialsegment,rightmsfgsuperiorfrontalgyrusmedialsegment,_4thventricle,rightmprgprecentralgyrusmedialsegment,rightitginferiortemporalgyrus,rightporgposteriororbitalgyrus,leftpcuprecuneus,leftventraldc,csfglobal" }, { "name": "y", - "value": "rs17125944_c" + "value": "gender" }, { "name": "pathology", @@ -1966,24 +2408,46 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs17125944_c\", \"field\": \"rs17125944_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}, {\"id\": \"rs17125944_c\", \"field\": \"rs17125944_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "M" + }, + { + "name": "negative_level", + "value": "F" } ], "output": { - "n_obs": 888, + "n_obs": 714, "coeff": [ - -1.9772198398658152, - 2.008088185747286, - -0.7308806541402474 + -10.978403040135834, + -0.07097420265770006, + 0.3178141054043076, + 0.43249740507717044, + -0.1333704294256197, + 0.41090074523907777, + -0.7336691258755091, + -0.036375590455567715, + 1.0394164380745456, + 0.787184245840706 ], "coeff_names": [ "Intercept", - "rightscasubcallosalarea", - "rightphgparahippocampalgyrus" + "rightmpogpostcentralgyrusmedialsegment", + "rightmsfgsuperiorfrontalgyrusmedialsegment", + "_4thventricle", + "rightmprgprecentralgyrusmedialsegment", + "rightitginferiortemporalgyrus", + "rightporgposteriororbitalgyrus", + "leftpcuprecuneus", + "leftventraldc", + "csfglobal" ] } }, @@ -1991,11 +2455,11 @@ "input": [ { "name": "x", - "value": "lefthippocampus,leftacgganteriorcingulategyrus,leftmtgmiddletemporalgyrus,leftprgprecentralgyrus,rightcaudate,fdg,rightliglingualgyrus" + "value": "rightmcggmiddlecingulategyrus,leftsogsuperioroccipitalgyrus" }, { "name": "y", - "value": "rs3818361_t" + "value": "rs3865444_t" }, { "name": "pathology", @@ -2003,34 +2467,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,edsd,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs3818361_t\", \"field\": \"rs3818361_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}, {\"id\": \"rs3818361_t\", \"field\": \"rs3818361_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "1" } ], "output": { - "n_obs": 589, + "n_obs": 444, "coeff": [ - -3.574605527308249, - 0.5704846102003782, - -0.2750763430520032, - -0.02632999421763776, - -0.06356066562469388, - 0.09781268594529696, - -0.08879353505636042, - 0.23010116474734174 + -3.4012523946007804, + 0.46900496488838905, + -0.10873239767951245 ], "coeff_names": [ "Intercept", - "lefthippocampus", - "leftacgganteriorcingulategyrus", - "leftmtgmiddletemporalgyrus", - "leftprgprecentralgyrus", - "rightcaudate", - "fdg", - "rightliglingualgyrus" + "rightmcggmiddlecingulategyrus", + "leftsogsuperioroccipitalgyrus" ] } }, @@ -2038,11 +2500,11 @@ "input": [ { "name": "x", - "value": "leftsmcsupplementarymotorcortex,leftliglingualgyrus,rightainsanteriorinsula" + "value": "leftpinsposteriorinsula,rightporgposteriororbitalgyrus,rightaorganteriororbitalgyrus,rightmpogpostcentralgyrusmedialsegment" }, { "name": "y", - "value": "rs3818361_t" + "value": "edsdcategory" }, { "name": "pathology", @@ -2050,26 +2512,36 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,adni,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs3818361_t\", \"field\": \"rs3818361_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs3818361_t\", \"field\": \"rs3818361_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "MCI" + }, + { + "name": "negative_level", + "value": "AD" } ], "output": { - "n_obs": 892, + "n_obs": 217, "coeff": [ - -3.6741595214089533, - -0.03704734121393591, - 0.003608038480938038, - 0.2640064044746655 + -2.6489770181321433, + 0.10215456729038339, + 0.3417920274662459, + 2.0784730761050576, + -2.3776397915958727 ], "coeff_names": [ "Intercept", - "leftsmcsupplementarymotorcortex", - "leftliglingualgyrus", - "rightainsanteriorinsula" + "leftpinsposteriorinsula", + "rightporgposteriororbitalgyrus", + "rightaorganteriororbitalgyrus", + "rightmpogpostcentralgyrusmedialsegment" ] } }, @@ -2077,11 +2549,11 @@ "input": [ { "name": "x", - "value": "rightainsanteriorinsula,leftcalccalcarinecortex,rightcuncuneus,leftthalamusproper,rightaccumbensarea,rightpcggposteriorcingulategyrus,leftlorglateralorbitalgyrus,rightmprgprecentralgyrusmedialsegment,subjectageyears,rightfugfusiformgyrus" + "value": "leftsmcsupplementarymotorcortex,rightacgganteriorcingulategyrus,rightcaudate" }, { "name": "y", - "value": "rs11767557_c" + "value": "neurodegenerativescategories" }, { "name": "pathology", @@ -2089,40 +2561,34 @@ }, { "name": "dataset", - "value": "adni" + "value": "edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs11767557_c\", \"field\": \"rs11767557_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs11767557_c\", \"field\": \"rs11767557_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "MCI" + }, + { + "name": "negative_level", + "value": "AD" } ], "output": { - "n_obs": 885, + "n_obs": 217, "coeff": [ - -0.9047515795959362, - 0.7755595444159604, - 0.6163983333750804, - -0.6494812948477129, - -1.438708247619076, - 6.879598147289463, - -0.26494114126610074, - -0.21385904271597334, - 0.7224802922502135, - 0.005063546836259594, - 0.5023612234934067 + -3.5597030062794244, + -0.17600546042639276, + 0.5235943356424313, + 0.6323695854613637 ], "coeff_names": [ "Intercept", - "rightainsanteriorinsula", - "leftcalccalcarinecortex", - "rightcuncuneus", - "leftthalamusproper", - "rightaccumbensarea", - "rightpcggposteriorcingulategyrus", - "leftlorglateralorbitalgyrus", - "rightmprgprecentralgyrusmedialsegment", - "subjectageyears", - "rightfugfusiformgyrus" + "leftsmcsupplementarymotorcortex", + "rightacgganteriorcingulategyrus", + "rightcaudate" ] } }, @@ -2130,11 +2596,11 @@ "input": [ { "name": "x", - "value": "leftputamen,lefttmptemporalpole" + "value": "rightmcggmiddlecingulategyrus" }, { "name": "y", - "value": "rs3818361_t" + "value": "apoe4" }, { "name": "pathology", @@ -2142,24 +2608,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,ppmi,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs3818361_t\", \"field\": \"rs3818361_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs3818361_t\", \"field\": \"rs3818361_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "1" } ], "output": { - "n_obs": 892, + "n_obs": 519, "coeff": [ - 1.0856882992232608, - -0.5470940862529146, - -0.15137341451869246 + -0.7070550978205009, + -0.09931920192685778 ], "coeff_names": [ "Intercept", - "leftputamen", - "lefttmptemporalpole" + "rightmcggmiddlecingulategyrus" ] } }, @@ -2167,11 +2639,11 @@ "input": [ { "name": "x", - "value": "rightaccumbensarea,leftscasubcallosalarea,rightsmcsupplementarymotorcortex" + "value": "rightcerebellumwhitematter" }, { "name": "y", - "value": "rs610932_a" + "value": "neurodegenerativescategories" }, { "name": "pathology", @@ -2179,26 +2651,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs610932_a\", \"field\": \"rs610932_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs610932_a\", \"field\": \"rs610932_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "AD" + }, + { + "name": "negative_level", + "value": "MCI" } ], "output": { - "n_obs": 889, + "n_obs": 217, "coeff": [ - -0.32855707932770906, - 5.655666690302862, - -3.265108460884385, - 0.1954799822478627 + -4.782691559702195, + 0.2947063795058659 ], "coeff_names": [ "Intercept", - "rightaccumbensarea", - "leftscasubcallosalarea", - "rightsmcsupplementarymotorcortex" + "rightcerebellumwhitematter" ] } }, @@ -2206,11 +2682,11 @@ "input": [ { "name": "x", - "value": "rightporgposteriororbitalgyrus" + "value": "leftsmcsupplementarymotorcortex,leftptplanumtemporale" }, { "name": "y", - "value": "rs17125944_c" + "value": "rs3764650_g" }, { "name": "pathology", @@ -2218,22 +2694,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs17125944_c\", \"field\": \"rs17125944_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs17125944_c\", \"field\": \"rs17125944_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "0" + }, + { + "name": "negative_level", + "value": "2" } ], "output": { - "n_obs": 888, + "n_obs": 739, "coeff": [ - -8.16919715226454, - 1.5186711670804698 + -5.633306016876095, + 0.5064231840532565, + -0.2802193605499415 ], "coeff_names": [ "Intercept", - "rightporgposteriororbitalgyrus" + "leftsmcsupplementarymotorcortex", + "leftptplanumtemporale" ] } }, @@ -2241,11 +2727,11 @@ "input": [ { "name": "x", - "value": "leftsogsuperioroccipitalgyrus,fdg,leftaorganteriororbitalgyrus,rightcocentraloperculum" + "value": "leftententorhinalarea,leftstgsuperiortemporalgyrus" }, { "name": "y", - "value": "adnicategory" + "value": "rs1476679_c" }, { "name": "pathology", @@ -2257,24 +2743,28 @@ }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"adnicategory\", \"field\": \"adnicategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"AD\"}, {\"id\": \"adnicategory\", \"field\": \"adnicategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"MCI\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "0" } ], "output": { - "n_obs": 741, + "n_obs": 329, "coeff": [ - -10.375675987714096, - 0.5497501358544883, - 1.7569071001691676, - 0.5524182652522868, - -0.3831362021266503 + -2.6441088395803476, + -0.013927883803547092, + 0.12295736587133572 ], "coeff_names": [ "Intercept", - "leftsogsuperioroccipitalgyrus", - "fdg", - "leftaorganteriororbitalgyrus", - "rightcocentraloperculum" + "leftententorhinalarea", + "leftstgsuperiortemporalgyrus" ] } }, @@ -2282,11 +2772,11 @@ "input": [ { "name": "x", - "value": "rightamygdala,montrealcognitiveassessment,rightsmcsupplementarymotorcortex,rightcocentraloperculum" + "value": "leftocpoccipitalpole,leftacgganteriorcingulategyrus,rightofugoccipitalfusiformgyrus" }, { "name": "y", - "value": "rs3851179_a" + "value": "rs610932_a" }, { "name": "pathology", @@ -2294,28 +2784,34 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,adni" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs3851179_a\", \"field\": \"rs3851179_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs3851179_a\", \"field\": \"rs3851179_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "1" } ], "output": { - "n_obs": 322, + "n_obs": 604, "coeff": [ - 0.40924860003751534, - -0.6109459104814191, - 0.07412311974233, - -0.2658743001591573, - -0.40895875243108587 + -0.21297074517859146, + -0.13412690812920372, + -0.15123000727151542, + 0.04901847162936337 ], "coeff_names": [ "Intercept", - "rightamygdala", - "montrealcognitiveassessment", - "rightsmcsupplementarymotorcortex", - "rightcocentraloperculum" + "leftocpoccipitalpole", + "leftacgganteriorcingulategyrus", + "rightofugoccipitalfusiformgyrus" ] } }, @@ -2323,11 +2819,11 @@ "input": [ { "name": "x", - "value": "righttrifgtriangularpartoftheinferiorfrontalgyrus,leftopifgopercularpartoftheinferiorfrontalgyrus" + "value": "leftfrpfrontalpole" }, { "name": "y", - "value": "rs2718058_g" + "value": "rs11136000_t" }, { "name": "pathology", @@ -2335,24 +2831,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,edsd,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs2718058_g\", \"field\": \"rs2718058_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs2718058_g\", \"field\": \"rs2718058_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "0" } ], "output": { - "n_obs": 552, + "n_obs": 440, "coeff": [ - -3.103983288888956, - 0.2981855299069117, - 0.2609097070304548 + -0.5847662560670841, + -0.09250046321070957 ], "coeff_names": [ "Intercept", - "righttrifgtriangularpartoftheinferiorfrontalgyrus", - "leftopifgopercularpartoftheinferiorfrontalgyrus" + "leftfrpfrontalpole" ] } }, @@ -2360,11 +2862,11 @@ "input": [ { "name": "x", - "value": "rightofugoccipitalfusiformgyrus" + "value": "rightsmgsupramarginalgyrus" }, { "name": "y", - "value": "rs744373_c" + "value": "alzheimerbroadcategory" }, { "name": "pathology", @@ -2372,22 +2874,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,adni" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs744373_c\", \"field\": \"rs744373_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs744373_c\", \"field\": \"rs744373_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "CN" + }, + { + "name": "negative_level", + "value": "AD" } ], "output": { - "n_obs": 838, + "n_obs": 490, "coeff": [ - -0.35578046179508094, - -0.2703367774753837 + -5.027795116587171, + 0.6789666016060145 ], "coeff_names": [ "Intercept", - "rightofugoccipitalfusiformgyrus" + "rightsmgsupramarginalgyrus" ] } }, @@ -2395,11 +2905,11 @@ "input": [ { "name": "x", - "value": "leftptplanumtemporale,rightmsfgsuperiorfrontalgyrusmedialsegment,rightamygdala" + "value": "rightmprgprecentralgyrusmedialsegment" }, { "name": "y", - "value": "apoe4" + "value": "rs17125944_c" }, { "name": "pathology", @@ -2407,26 +2917,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,edsd,adni" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"apoe4\", \"field\": \"apoe4\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}, {\"id\": \"apoe4\", \"field\": \"apoe4\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "0" } ], "output": { - "n_obs": 1061, + "n_obs": 747, "coeff": [ - 0.4824562066760484, - -0.15802829534298043, - 0.49206955450112905, - -7.018670292202753 + -7.950061646004823, + 1.252714935135743 ], "coeff_names": [ "Intercept", - "leftptplanumtemporale", - "rightmsfgsuperiorfrontalgyrusmedialsegment", - "rightamygdala" + "rightmprgprecentralgyrusmedialsegment" ] } }, @@ -2434,11 +2948,11 @@ "input": [ { "name": "x", - "value": "leftputamen,leftopifgopercularpartoftheinferiorfrontalgyrus,leftgregyrusrectus" + "value": "leftamygdala,leftmogmiddleoccipitalgyrus" }, { "name": "y", - "value": "neurodegenerativescategories" + "value": "rs744373_c" }, { "name": "pathology", @@ -2446,26 +2960,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"neurodegenerativescategories\", \"field\": \"neurodegenerativescategories\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"AD\"}, {\"id\": \"neurodegenerativescategories\", \"field\": \"neurodegenerativescategories\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"MCI\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "1" + }, + { + "name": "negative_level", + "value": "0" } ], "output": { - "n_obs": 798, + "n_obs": 743, "coeff": [ - -2.7143728393806543, - 0.5550153325041097, - 0.576521383235635, - -0.13130836659496392 + 0.8983183229724705, + 0.20880298441954612, + -0.21290392735113814 ], "coeff_names": [ "Intercept", - "leftputamen", - "leftopifgopercularpartoftheinferiorfrontalgyrus", - "leftgregyrusrectus" + "leftamygdala", + "leftmogmiddleoccipitalgyrus" ] } }, @@ -2473,11 +2993,11 @@ "input": [ { "name": "x", - "value": "leftsmcsupplementarymotorcortex,leftptplanumtemporale,leftmorgmedialorbitalgyrus,rightputamen,rightmfgmiddlefrontalgyrus,rightsmgsupramarginalgyrus,rightmfcmedialfrontalcortex,leftfofrontaloperculum,leftocpoccipitalpole" + "value": "rightaorganteriororbitalgyrus,leftmorgmedialorbitalgyrus" }, { "name": "y", - "value": "adnicategory" + "value": "alzheimerbroadcategory" }, { "name": "pathology", @@ -2485,38 +3005,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "edsd,adni" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"adnicategory\", \"field\": \"adnicategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"AD\"}, {\"id\": \"adnicategory\", \"field\": \"adnicategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"MCI\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "Other" + }, + { + "name": "negative_level", + "value": "CN" } ], "output": { - "n_obs": 1066, + "n_obs": 1071, "coeff": [ - -4.7571037248546935, - -0.12134231118815325, - -0.41650577672420463, - 0.9562925315401533, - 0.186954973246826, - 0.0935422576525464, - 0.7668586476178273, - -2.2823575930835722, - -0.9687271870922376, - 0.3477942748719645 + 1.154043529128957, + -0.7798131882205266, + 0.1644383867895363 ], "coeff_names": [ "Intercept", - "leftsmcsupplementarymotorcortex", - "leftptplanumtemporale", - "leftmorgmedialorbitalgyrus", - "rightputamen", - "rightmfgmiddlefrontalgyrus", - "rightsmgsupramarginalgyrus", - "rightmfcmedialfrontalcortex", - "leftfofrontaloperculum", - "leftocpoccipitalpole" + "rightaorganteriororbitalgyrus", + "leftmorgmedialorbitalgyrus" ] } }, @@ -2524,11 +3038,11 @@ "input": [ { "name": "x", - "value": "leftainsanteriorinsula,rightpoparietaloperculum" + "value": "leftaorganteriororbitalgyrus,leftptplanumtemporale,rightliglingualgyrus,rightaorganteriororbitalgyrus,leftphgparahippocampalgyrus,leftfugfusiformgyrus" }, { "name": "y", - "value": "apoe4" + "value": "adnicategory" }, { "name": "pathology", @@ -2536,24 +3050,40 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"apoe4\", \"field\": \"apoe4\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"apoe4\", \"field\": \"apoe4\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "CN" + }, + { + "name": "negative_level", + "value": "AD" } ], "output": { - "n_obs": 1061, + "n_obs": 490, "coeff": [ - -1.0395009892243072, - 0.11811565236799391, - -0.28622876590494084 + -6.022224781079315, + -3.265371840241305, + -0.9240756830700755, + -1.5757060080738698, + 2.021877662554668, + 3.6410939350446934, + 1.6002366471520992 ], "coeff_names": [ "Intercept", - "leftainsanteriorinsula", - "rightpoparietaloperculum" + "leftaorganteriororbitalgyrus", + "leftptplanumtemporale", + "rightliglingualgyrus", + "rightaorganteriororbitalgyrus", + "leftphgparahippocampalgyrus", + "leftfugfusiformgyrus" ] } }, @@ -2561,11 +3091,11 @@ "input": [ { "name": "x", - "value": "leftptplanumtemporale,leftporgposteriororbitalgyrus" + "value": "leftscasubcallosalarea,rightmtgmiddletemporalgyrus,leftmprgprecentralgyrusmedialsegment,rightmsfgsuperiorfrontalgyrusmedialsegment" }, { "name": "y", - "value": "gender" + "value": "rs11767557_c" }, { "name": "pathology", @@ -2573,24 +3103,36 @@ }, { "name": "dataset", - "value": "adni" + "value": "edsd,adni" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"gender\", \"field\": \"gender\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"F\"}, {\"id\": \"gender\", \"field\": \"gender\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"M\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "0" } ], "output": { - "n_obs": 1066, + "n_obs": 630, "coeff": [ - -9.724492178812481, - 2.121255409961503, - 2.3486831991410964 + -3.4658143348491732, + -1.3423341475227228, + 0.08718737744069219, + -1.2959854572194445, + 0.5793422740180213 ], "coeff_names": [ "Intercept", - "leftptplanumtemporale", - "leftporgposteriororbitalgyrus" + "leftscasubcallosalarea", + "rightmtgmiddletemporalgyrus", + "leftmprgprecentralgyrusmedialsegment", + "rightmsfgsuperiorfrontalgyrusmedialsegment" ] } }, @@ -2598,11 +3140,11 @@ "input": [ { "name": "x", - "value": "leftcalccalcarinecortex,leftppplanumpolare" + "value": "leftptplanumtemporale" }, { "name": "y", - "value": "neurodegenerativescategories" + "value": "alzheimerbroadcategory" }, { "name": "pathology", @@ -2610,24 +3152,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "edsd,adni,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"neurodegenerativescategories\", \"field\": \"neurodegenerativescategories\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"MCI\"}, {\"id\": \"neurodegenerativescategories\", \"field\": \"neurodegenerativescategories\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"AD\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "AD" + }, + { + "name": "negative_level", + "value": "CN" } ], "output": { - "n_obs": 798, + "n_obs": 782, "coeff": [ - -0.6388106971266839, - 0.5344396733872178, - -0.040476454047968215 + -1.313473084465156, + 0.7382755005120571 ], "coeff_names": [ "Intercept", - "leftcalccalcarinecortex", - "leftppplanumpolare" + "leftptplanumtemporale" ] } }, @@ -2635,11 +3183,11 @@ "input": [ { "name": "x", - "value": "righttmptemporalpole" + "value": "rightitginferiortemporalgyrus,rightventraldc,leftainsanteriorinsula" }, { "name": "y", - "value": "rs11136000_t" + "value": "rs17125944_c" }, { "name": "pathology", @@ -2647,22 +3195,34 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,edsd,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs11136000_t\", \"field\": \"rs11136000_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}, {\"id\": \"rs11136000_t\", \"field\": \"rs11136000_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "0" } ], "output": { - "n_obs": 891, + "n_obs": 747, "coeff": [ - -0.810600278254982, - -0.013542453555888005 + -12.008704287418318, + 0.22389666192719973, + 0.868635607461425, + 0.18540851130272407 ], "coeff_names": [ "Intercept", - "righttmptemporalpole" + "rightitginferiortemporalgyrus", + "rightventraldc", + "leftainsanteriorinsula" ] } }, @@ -2670,11 +3230,11 @@ "input": [ { "name": "x", - "value": "rightpcuprecuneus" + "value": "leftcocentraloperculum,rightlorglateralorbitalgyrus" }, { "name": "y", - "value": "agegroup" + "value": "rs744373_c" }, { "name": "pathology", @@ -2682,22 +3242,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,edsd,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"agegroup\", \"field\": \"agegroup\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"+80y\"}, {\"id\": \"agegroup\", \"field\": \"agegroup\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"60-69y\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "0" } ], "output": { - "n_obs": 1066, + "n_obs": 506, "coeff": [ - -1.1283245280496264, - 0.11917964088107444 + 0.9952125364310419, + -0.09402002041472743, + -0.9494373150317408 ], "coeff_names": [ "Intercept", - "rightpcuprecuneus" + "leftcocentraloperculum", + "rightlorglateralorbitalgyrus" ] } }, @@ -2705,11 +3275,11 @@ "input": [ { "name": "x", - "value": "leftppplanumpolare,leftventraldc,rightptplanumtemporale" + "value": "leftmpogpostcentralgyrusmedialsegment,leftsfgsuperiorfrontalgyrus" }, { "name": "y", - "value": "rs190982_g" + "value": "agegroup" }, { "name": "pathology", @@ -2717,26 +3287,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "edsd,adni,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs190982_g\", \"field\": \"rs190982_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs190982_g\", \"field\": \"rs190982_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "60-69y" + }, + { + "name": "negative_level", + "value": "50-59y" } ], "output": { - "n_obs": 536, + "n_obs": 902, "coeff": [ - 1.3591850964141365, - -1.653446750638958, - -0.34894354296715707, - 1.444932928288546 + 2.551116754216032, + 0.21795294533591933, + -0.12533124606083215 ], "coeff_names": [ "Intercept", - "leftppplanumpolare", - "leftventraldc", - "rightptplanumtemporale" + "leftmpogpostcentralgyrusmedialsegment", + "leftsfgsuperiorfrontalgyrus" ] } }, @@ -2744,11 +3320,11 @@ "input": [ { "name": "x", - "value": "rightcuncuneus,rightitginferiortemporalgyrus,leftsplsuperiorparietallobule,rightainsanteriorinsula,leftmtgmiddletemporalgyrus,leftfugfusiformgyrus,leftlorglateralorbitalgyrus" + "value": "rightsmgsupramarginalgyrus" }, { "name": "y", - "value": "rs3865444_t" + "value": "gender" }, { "name": "pathology", @@ -2756,34 +3332,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs3865444_t\", \"field\": \"rs3865444_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs3865444_t\", \"field\": \"rs3865444_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "M" + }, + { + "name": "negative_level", + "value": "F" } ], "output": { - "n_obs": 892, + "n_obs": 1780, "coeff": [ - -3.5156570188912037, - -0.09391061607109098, - 0.06949142793699509, - -0.12942789648001482, - 0.21294948456119678, - 0.18121233455234248, - -0.30082884276341293, - 0.6636010969334062 + -5.369845891906666, + 0.7204365440251995 ], "coeff_names": [ "Intercept", - "rightcuncuneus", - "rightitginferiortemporalgyrus", - "leftsplsuperiorparietallobule", - "rightainsanteriorinsula", - "leftmtgmiddletemporalgyrus", - "leftfugfusiformgyrus", - "leftlorglateralorbitalgyrus" + "rightsmgsupramarginalgyrus" ] } }, @@ -2791,11 +3363,11 @@ "input": [ { "name": "x", - "value": "rightcuncuneus,rightcocentraloperculum,leftfofrontaloperculum,rightangangulargyrus,rightsfgsuperiorfrontalgyrus,rightsmgsupramarginalgyrus,rightprgprecentralgyrus" + "value": "leftententorhinalarea,leftliglingualgyrus,rightainsanteriorinsula,leftlorglateralorbitalgyrus,rightmorgmedialorbitalgyrus" }, { "name": "y", - "value": "rs3764650_g" + "value": "gender" }, { "name": "pathology", @@ -2803,34 +3375,38 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,ppmi,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs3764650_g\", \"field\": \"rs3764650_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs3764650_g\", \"field\": \"rs3764650_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "F" + }, + { + "name": "negative_level", + "value": "M" } ], "output": { - "n_obs": 892, + "n_obs": 2217, "coeff": [ - -5.662185052868965, - -0.8791689545137812, - -0.9602564530197412, - 1.4164270064670739, - -0.17963095206368224, - -0.281275418088527, - 1.1974515228876648, - 0.28809478915793574 + -12.838266044301383, + 1.4866847577240692, + 0.9921475326376672, + 2.4971638398168174, + -0.5761358162280062, + -1.4038191804096432 ], "coeff_names": [ "Intercept", - "rightcuncuneus", - "rightcocentraloperculum", - "leftfofrontaloperculum", - "rightangangulargyrus", - "rightsfgsuperiorfrontalgyrus", - "rightsmgsupramarginalgyrus", - "rightprgprecentralgyrus" + "leftententorhinalarea", + "leftliglingualgyrus", + "rightainsanteriorinsula", + "leftlorglateralorbitalgyrus", + "rightmorgmedialorbitalgyrus" ] } }, @@ -2838,11 +3414,11 @@ "input": [ { "name": "x", - "value": "rightmtgmiddletemporalgyrus,rightpcggposteriorcingulategyrus,rightmorgmedialorbitalgyrus" + "value": "rightlorglateralorbitalgyrus" }, { "name": "y", - "value": "rs17125944_c" + "value": "agegroup" }, { "name": "pathology", @@ -2850,26 +3426,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs17125944_c\", \"field\": \"rs17125944_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs17125944_c\", \"field\": \"rs17125944_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "-50y" + }, + { + "name": "negative_level", + "value": "60-69y" } ], "output": { - "n_obs": 888, + "n_obs": 198, "coeff": [ - -9.737113464137376, - 0.5037736114832001, - -0.6919616363776332, - 0.21821777717763866 + -2.9649281688521216, + 1.5436220226428057 ], "coeff_names": [ "Intercept", - "rightmtgmiddletemporalgyrus", - "rightpcggposteriorcingulategyrus", - "rightmorgmedialorbitalgyrus" + "rightlorglateralorbitalgyrus" ] } }, @@ -2877,11 +3457,11 @@ "input": [ { "name": "x", - "value": "subjectage,rightsmgsupramarginalgyrus,leftcaudate,leftmsfgsuperiorfrontalgyrusmedialsegment,rightmprgprecentralgyrusmedialsegment,rightofugoccipitalfusiformgyrus,rightlorglateralorbitalgyrus" + "value": "leftfrpfrontalpole,rightaorganteriororbitalgyrus,rightsfgsuperiorfrontalgyrus,rightitginferiortemporalgyrus,rightaccumbensarea,rightioginferioroccipitalgyrus,rightthalamusproper,rightstgsuperiortemporalgyrus" }, { "name": "y", - "value": "rs1476679_c" + "value": "adnicategory" }, { "name": "pathology", @@ -2893,30 +3473,40 @@ }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs1476679_c\", \"field\": \"rs1476679_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs1476679_c\", \"field\": \"rs1476679_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "AD" + }, + { + "name": "negative_level", + "value": "CN" } ], "output": { - "n_obs": 535, + "n_obs": 490, "coeff": [ - -5.365033246555066, - 0.030518789664660987, - 0.2186797018246616, - -0.08547296034271173, - -0.20595890524244448, - 0.6167951804880417, - -0.06154995933194874, - -0.027937011762284754 + -5.711193286044684, + -3.098576656531056, + 2.0462801575339267, + -0.2953023362920214, + 0.8658272972688227, + 1.5873279902449762, + 0.34378567888708295, + 0.24080950101412874, + 0.5693567415723443 ], "coeff_names": [ "Intercept", - "subjectage", - "rightsmgsupramarginalgyrus", - "leftcaudate", - "leftmsfgsuperiorfrontalgyrusmedialsegment", - "rightmprgprecentralgyrusmedialsegment", - "rightofugoccipitalfusiformgyrus", - "rightlorglateralorbitalgyrus" + "leftfrpfrontalpole", + "rightaorganteriororbitalgyrus", + "rightsfgsuperiorfrontalgyrus", + "rightitginferiortemporalgyrus", + "rightaccumbensarea", + "rightioginferioroccipitalgyrus", + "rightthalamusproper", + "rightstgsuperiortemporalgyrus" ] } }, @@ -2924,11 +3514,11 @@ "input": [ { "name": "x", - "value": "rightpoparietaloperculum,lefttmptemporalpole,rightcaudate,leftaorganteriororbitalgyrus" + "value": "rightttgtransversetemporalgyrus" }, { "name": "y", - "value": "alzheimerbroadcategory" + "value": "agegroup" }, { "name": "pathology", @@ -2936,28 +3526,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"alzheimerbroadcategory\", \"field\": \"alzheimerbroadcategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"Other\"}, {\"id\": \"alzheimerbroadcategory\", \"field\": \"alzheimerbroadcategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"AD\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "-50y" + }, + { + "name": "negative_level", + "value": "70-79y" } ], "output": { - "n_obs": 1066, + "n_obs": 482, "coeff": [ - -3.270489680663836, - 0.3041833580600895, - 0.7503467638071595, - -0.5146772376319071, - -0.1066923783124116 + 1.4034565719190506, + -0.4894077819658199 ], "coeff_names": [ "Intercept", - "rightpoparietaloperculum", - "lefttmptemporalpole", - "rightcaudate", - "leftaorganteriororbitalgyrus" + "rightttgtransversetemporalgyrus" ] } }, @@ -2965,11 +3557,11 @@ "input": [ { "name": "x", - "value": "leftcalccalcarinecortex" + "value": "rightententorhinalarea,rightfrpfrontalpole,rightporgposteriororbitalgyrus,rightcocentraloperculum,rightmfgmiddlefrontalgyrus" }, { "name": "y", - "value": "apoe4" + "value": "edsdcategory" }, { "name": "pathology", @@ -2977,22 +3569,38 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"apoe4\", \"field\": \"apoe4\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}, {\"id\": \"apoe4\", \"field\": \"apoe4\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "CN" + }, + { + "name": "negative_level", + "value": "AD" } ], "output": { - "n_obs": 1061, + "n_obs": 292, "coeff": [ - -2.4230626423595214, - 0.2982880140962524 + -6.75265305410533, + 9.520079242930079, + -2.1469519520828046, + -0.006814225306057821, + -2.2886934162322983, + 0.47289093086662026 ], "coeff_names": [ "Intercept", - "leftcalccalcarinecortex" + "rightententorhinalarea", + "rightfrpfrontalpole", + "rightporgposteriororbitalgyrus", + "rightcocentraloperculum", + "rightmfgmiddlefrontalgyrus" ] } }, @@ -3000,11 +3608,11 @@ "input": [ { "name": "x", - "value": "rightopifgopercularpartoftheinferiorfrontalgyrus,rightfofrontaloperculum" + "value": "leftmfcmedialfrontalcortex" }, { "name": "y", - "value": "rs3865444_t" + "value": "apoe4" }, { "name": "pathology", @@ -3016,20 +3624,26 @@ }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs3865444_t\", \"field\": \"rs3865444_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}, {\"id\": \"rs3865444_t\", \"field\": \"rs3865444_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "1" + }, + { + "name": "negative_level", + "value": "0" } ], "output": { - "n_obs": 892, + "n_obs": 937, "coeff": [ - -2.6446327806133274, - -0.07004415188952656, - 0.5412329634013252 + 0.20210582570311905, + -0.2929613793123545 ], "coeff_names": [ "Intercept", - "rightopifgopercularpartoftheinferiorfrontalgyrus", - "rightfofrontaloperculum" + "leftmfcmedialfrontalcortex" ] } }, @@ -3037,11 +3651,11 @@ "input": [ { "name": "x", - "value": "rightacgganteriorcingulategyrus" + "value": "rightfugfusiformgyrus,leftioginferioroccipitalgyrus" }, { "name": "y", - "value": "adnicategory" + "value": "neurodegenerativescategories" }, { "name": "pathology", @@ -3049,22 +3663,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"adnicategory\", \"field\": \"adnicategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"CN\"}, {\"id\": \"adnicategory\", \"field\": \"adnicategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"AD\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "PD" + }, + { + "name": "negative_level", + "value": "MCI" } ], "output": { - "n_obs": 1066, + "n_obs": 981, "coeff": [ - 0.9934745163930405, - -0.19854753918237558 + -7.957127050594371, + 0.9394183737527463, + 0.09235380587822516 ], "coeff_names": [ "Intercept", - "rightacgganteriorcingulategyrus" + "rightfugfusiformgyrus", + "leftioginferioroccipitalgyrus" ] } }, @@ -3072,7 +3696,7 @@ "input": [ { "name": "x", - "value": "rightsmcsupplementarymotorcortex,rightitginferiortemporalgyrus,rightpinsposteriorinsula" + "value": "leftfofrontaloperculum" }, { "name": "y", @@ -3088,22 +3712,26 @@ }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs1476679_c\", \"field\": \"rs1476679_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs1476679_c\", \"field\": \"rs1476679_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "1" + }, + { + "name": "negative_level", + "value": "0" } ], "output": { - "n_obs": 535, + "n_obs": 488, "coeff": [ - -1.1481005815308665, - 0.4516359918433529, - -0.05873714822982642, - -0.39597677871185977 + -1.6165224132551221, + 0.6633283833493653 ], "coeff_names": [ "Intercept", - "rightsmcsupplementarymotorcortex", - "rightitginferiortemporalgyrus", - "rightpinsposteriorinsula" + "leftfofrontaloperculum" ] } }, @@ -3111,11 +3739,11 @@ "input": [ { "name": "x", - "value": "rightfugfusiformgyrus,leftaccumbensarea,rightgregyrusrectus,rightsmgsupramarginalgyrus,leftpcggposteriorcingulategyrus,righttmptemporalpole" + "value": "leftprgprecentralgyrus,rightcaudate,leftmpogpostcentralgyrusmedialsegment" }, { "name": "y", - "value": "rs610932_a" + "value": "rs10498633_t" }, { "name": "pathology", @@ -3127,28 +3755,30 @@ }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs610932_a\", \"field\": \"rs610932_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs610932_a\", \"field\": \"rs610932_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "1" + }, + { + "name": "negative_level", + "value": "2" } ], "output": { - "n_obs": 889, + "n_obs": 333, "coeff": [ - 1.0817683715917932, - 0.003203914188593758, - -5.740379146142796, - 0.28190207786444554, - 0.14809395749442622, - -0.21326542329980816, - 0.1239675237833748 + -4.014404492147296, + 0.07898624695177157, + 0.137662027546091, + 0.606442589330398 ], "coeff_names": [ "Intercept", - "rightfugfusiformgyrus", - "leftaccumbensarea", - "rightgregyrusrectus", - "rightsmgsupramarginalgyrus", - "leftpcggposteriorcingulategyrus", - "righttmptemporalpole" + "leftprgprecentralgyrus", + "rightcaudate", + "leftmpogpostcentralgyrusmedialsegment" ] } }, @@ -3156,11 +3786,11 @@ "input": [ { "name": "x", - "value": "rightsmgsupramarginalgyrus,rightmprgprecentralgyrusmedialsegment" + "value": "leftventraldc,minimentalstate,rightofugoccipitalfusiformgyrus,leftppplanumpolare,leftocpoccipitalpole,lefttrifgtriangularpartoftheinferiorfrontalgyrus,leftsmgsupramarginalgyrus,leftphgparahippocampalgyrus,leftmtgmiddletemporalgyrus" }, { "name": "y", - "value": "rs610932_a" + "value": "agegroup" }, { "name": "pathology", @@ -3168,24 +3798,46 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,adni" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs610932_a\", \"field\": \"rs610932_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}, {\"id\": \"rs610932_a\", \"field\": \"rs610932_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "50-59y" + }, + { + "name": "negative_level", + "value": "70-79y" } ], "output": { - "n_obs": 889, + "n_obs": 588, "coeff": [ - -0.2354393487674956, - -0.05882499596841113, - 0.007972222181104579 + 2.5881695814319103, + -1.0794623919777906, + 0.11947740607543068, + -0.9789476181353883, + 3.287996586379003, + 0.49353595020778457, + -0.3007807003908169, + 0.11615323061577623, + -2.3468385087525014, + 0.3191166172083576 ], "coeff_names": [ "Intercept", - "rightsmgsupramarginalgyrus", - "rightmprgprecentralgyrusmedialsegment" + "leftventraldc", + "minimentalstate", + "rightofugoccipitalfusiformgyrus", + "leftppplanumpolare", + "leftocpoccipitalpole", + "lefttrifgtriangularpartoftheinferiorfrontalgyrus", + "leftsmgsupramarginalgyrus", + "leftphgparahippocampalgyrus", + "leftmtgmiddletemporalgyrus" ] } }, @@ -3193,11 +3845,11 @@ "input": [ { "name": "x", - "value": "leftttgtransversetemporalgyrus,leftsogsuperioroccipitalgyrus" + "value": "leftgregyrusrectus" }, { "name": "y", - "value": "neurodegenerativescategories" + "value": "rs3851179_a" }, { "name": "pathology", @@ -3205,24 +3857,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "edsd,ppmi,adni" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"neurodegenerativescategories\", \"field\": \"neurodegenerativescategories\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"MCI\"}, {\"id\": \"neurodegenerativescategories\", \"field\": \"neurodegenerativescategories\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"AD\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "1" } ], "output": { - "n_obs": 798, + "n_obs": 501, "coeff": [ - -2.3946924036724186, - -0.5177547703635993, - 1.2017920885178277 + 1.633136642878292, + -1.4361208673621937 ], "coeff_names": [ "Intercept", - "leftttgtransversetemporalgyrus", - "leftsogsuperioroccipitalgyrus" + "leftgregyrusrectus" ] } }, @@ -3230,11 +3888,11 @@ "input": [ { "name": "x", - "value": "leftlorglateralorbitalgyrus,rightpcuprecuneus" + "value": "subjectage" }, { "name": "y", - "value": "rs3865444_t" + "value": "rs3851179_a" }, { "name": "pathology", @@ -3242,24 +3900,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs3865444_t\", \"field\": \"rs3865444_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs3865444_t\", \"field\": \"rs3865444_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "0" } ], "output": { - "n_obs": 892, + "n_obs": 499, "coeff": [ - 0.12191334716115211, - 0.5922570190059887, - -0.1595671930097825 + -2.740181031107059, + 0.019581888845113422 ], "coeff_names": [ "Intercept", - "leftlorglateralorbitalgyrus", - "rightpcuprecuneus" + "subjectage" ] } }, @@ -3267,11 +3931,11 @@ "input": [ { "name": "x", - "value": "minimentalstate,rightputamen,rightofugoccipitalfusiformgyrus,leftofugoccipitalfusiformgyrus" + "value": "leftphgparahippocampalgyrus,rightprgprecentralgyrus" }, { "name": "y", - "value": "rs610932_a" + "value": "rs3851179_a" }, { "name": "pathology", @@ -3279,28 +3943,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,ppmi,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs610932_a\", \"field\": \"rs610932_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}, {\"id\": \"rs610932_a\", \"field\": \"rs610932_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "1" } ], "output": { - "n_obs": 889, + "n_obs": 501, "coeff": [ - 1.1544630537996377, - 0.017182919723200893, - -0.6329647770962211, - -0.022194782074190178, - 0.3140811962802812 + 0.9218370892205258, + -0.4121830475819115, + -0.07979597543588916 ], "coeff_names": [ "Intercept", - "minimentalstate", - "rightputamen", - "rightofugoccipitalfusiformgyrus", - "leftofugoccipitalfusiformgyrus" + "leftphgparahippocampalgyrus", + "rightprgprecentralgyrus" ] } }, @@ -3308,11 +3976,11 @@ "input": [ { "name": "x", - "value": "leftpogpostcentralgyrus" + "value": "leftstgsuperiortemporalgyrus" }, { "name": "y", - "value": "apoe4" + "value": "rs3764650_g" }, { "name": "pathology", @@ -3320,22 +3988,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,adni,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"apoe4\", \"field\": \"apoe4\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}, {\"id\": \"apoe4\", \"field\": \"apoe4\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "0" } ], "output": { - "n_obs": 1061, + "n_obs": 739, "coeff": [ - 0.032295486710389586, - -0.030186850011402627 + -6.1494052082198145, + 0.3749499464123788 ], "coeff_names": [ "Intercept", - "leftpogpostcentralgyrus" + "leftstgsuperiortemporalgyrus" ] } }, @@ -3343,11 +4019,11 @@ "input": [ { "name": "x", - "value": "rightmtgmiddletemporalgyrus,rightorifgorbitalpartoftheinferiorfrontalgyrus" + "value": "leftmorgmedialorbitalgyrus,rightfrpfrontalpole" }, { "name": "y", - "value": "rs2718058_g" + "value": "apoe4" }, { "name": "pathology", @@ -3355,24 +4031,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs2718058_g\", \"field\": \"rs2718058_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs2718058_g\", \"field\": \"rs2718058_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "0" } ], "output": { - "n_obs": 552, + "n_obs": 666, "coeff": [ - -1.7092209387887292, - -0.05165907942782197, - 0.7726066120504289 + -1.0894418226613758, + -0.32491476893556637, + 0.22978589833448845 ], "coeff_names": [ "Intercept", - "rightmtgmiddletemporalgyrus", - "rightorifgorbitalpartoftheinferiorfrontalgyrus" + "leftmorgmedialorbitalgyrus", + "rightfrpfrontalpole" ] } }, @@ -3380,11 +4064,11 @@ "input": [ { "name": "x", - "value": "rightioginferioroccipitalgyrus,leftmcggmiddlecingulategyrus,leftscasubcallosalarea,leftfofrontaloperculum" + "value": "leftcalccalcarinecortex" }, { "name": "y", - "value": "rs3818361_t" + "value": "rs3851179_a" }, { "name": "pathology", @@ -3396,24 +4080,26 @@ }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs3818361_t\", \"field\": \"rs3818361_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}, {\"id\": \"rs3818361_t\", \"field\": \"rs3818361_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "1" + }, + { + "name": "negative_level", + "value": "0" } ], "output": { - "n_obs": 892, + "n_obs": 784, "coeff": [ - -2.567327238034728, - 0.015644399901791634, - -0.2167977927321555, - 0.2941543424132473, - 0.1765894953916882 + -1.001982420437677, + 0.32026493186498234 ], "coeff_names": [ "Intercept", - "rightioginferioroccipitalgyrus", - "leftmcggmiddlecingulategyrus", - "leftscasubcallosalarea", - "leftfofrontaloperculum" + "leftcalccalcarinecortex" ] } }, @@ -3421,11 +4107,11 @@ "input": [ { "name": "x", - "value": "righthippocampus,rightlorglateralorbitalgyrus,righttrifgtriangularpartoftheinferiorfrontalgyrus" + "value": "rightaorganteriororbitalgyrus" }, { "name": "y", - "value": "rs610932_a" + "value": "dataset" }, { "name": "pathology", @@ -3433,26 +4119,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,edsd,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs610932_a\", \"field\": \"rs610932_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs610932_a\", \"field\": \"rs610932_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "ppmi" + }, + { + "name": "negative_level", + "value": "edsd" } ], "output": { - "n_obs": 889, + "n_obs": 1151, "coeff": [ - -0.25104641388016213, - -0.16077930928352, - -0.05748033234008986, - 0.05890972600908001 + -2.3423990907450056, + 1.6219908456558503 ], "coeff_names": [ "Intercept", - "righthippocampus", - "rightlorglateralorbitalgyrus", - "righttrifgtriangularpartoftheinferiorfrontalgyrus" + "rightaorganteriororbitalgyrus" ] } }, @@ -3460,11 +4150,11 @@ "input": [ { "name": "x", - "value": "leftputamen" + "value": "rightbasalforebrain,csfglobal" }, { "name": "y", - "value": "adnicategory" + "value": "agegroup" }, { "name": "pathology", @@ -3472,22 +4162,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"adnicategory\", \"field\": \"adnicategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"AD\"}, {\"id\": \"adnicategory\", \"field\": \"adnicategory\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"CN\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "-50y" + }, + { + "name": "negative_level", + "value": "50-59y" } ], "output": { - "n_obs": 1066, + "n_obs": 265, "coeff": [ - -3.2868906586745883, - 0.9293365799310862 + 2.9147943180925333, + -11.496492724294455, + 2.2516572660066934 ], "coeff_names": [ "Intercept", - "leftputamen" + "rightbasalforebrain", + "csfglobal" ] } }, @@ -3495,11 +4195,11 @@ "input": [ { "name": "x", - "value": "rightpcuprecuneus" + "value": "leftmogmiddleoccipitalgyrus,rightphgparahippocampalgyrus,rightventraldc,rightptplanumtemporale,leftpcuprecuneus,leftpinsposteriorinsula" }, { "name": "y", - "value": "rs190982_g" + "value": "dataset" }, { "name": "pathology", @@ -3507,22 +4207,40 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs190982_g\", \"field\": \"rs190982_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs190982_g\", \"field\": \"rs190982_g\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "ppmi" + }, + { + "name": "negative_level", + "value": "edsd" } ], "output": { - "n_obs": 536, + "n_obs": 1151, "coeff": [ - -0.3249925110553894, - -0.0856101486704768 + -6.732937476458674, + 0.4910519078959502, + 2.1867032436297005, + -0.49293769460419967, + -0.5815751212516206, + 0.3191090160121539, + -0.94531884586308 ], "coeff_names": [ "Intercept", - "rightpcuprecuneus" + "leftmogmiddleoccipitalgyrus", + "rightphgparahippocampalgyrus", + "rightventraldc", + "rightptplanumtemporale", + "leftpcuprecuneus", + "leftpinsposteriorinsula" ] } }, @@ -3530,11 +4248,11 @@ "input": [ { "name": "x", - "value": "rightainsanteriorinsula,righttrifgtriangularpartoftheinferiorfrontalgyrus,rightmorgmedialorbitalgyrus" + "value": "rightainsanteriorinsula,leftmprgprecentralgyrusmedialsegment,leftmtgmiddletemporalgyrus" }, { "name": "y", - "value": "apoe4" + "value": "rs744373_c" }, { "name": "pathology", @@ -3542,26 +4260,34 @@ }, { "name": "dataset", - "value": "adni" + "value": "edsd,adni" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"apoe4\", \"field\": \"apoe4\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}, {\"id\": \"apoe4\", \"field\": \"apoe4\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "2" + }, + { + "name": "negative_level", + "value": "1" } ], "output": { - "n_obs": 1061, + "n_obs": 427, "coeff": [ - -1.092408034327955, - -0.13542525729627022, - -0.05002413057305232, - 0.09129146807891558 + -0.4006937471283802, + -0.005875137499075411, + -0.2253098316997281, + -0.01868043099689318 ], "coeff_names": [ "Intercept", "rightainsanteriorinsula", - "righttrifgtriangularpartoftheinferiorfrontalgyrus", - "rightmorgmedialorbitalgyrus" + "leftmprgprecentralgyrusmedialsegment", + "leftmtgmiddletemporalgyrus" ] } }, @@ -3569,11 +4295,11 @@ "input": [ { "name": "x", - "value": "rightporgposteriororbitalgyrus,leftmpogpostcentralgyrusmedialsegment" + "value": "leftlateralventricle,leftpinsposteriorinsula,leftfrpfrontalpole" }, { "name": "y", - "value": "rs744373_c" + "value": "alzheimerbroadcategory" }, { "name": "pathology", @@ -3581,24 +4307,34 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs744373_c\", \"field\": \"rs744373_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs744373_c\", \"field\": \"rs744373_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "AD" + }, + { + "name": "negative_level", + "value": "CN" } ], "output": { - "n_obs": 838, + "n_obs": 292, "coeff": [ - -0.4834140025735661, - -0.08339097519553344, - -0.5208082871485453 + -2.1318583064481653, + -0.15858340142111682, + 0.4154178211346591, + 1.2988546967327264 ], "coeff_names": [ "Intercept", - "rightporgposteriororbitalgyrus", - "leftmpogpostcentralgyrusmedialsegment" + "leftlateralventricle", + "leftpinsposteriorinsula", + "leftfrpfrontalpole" ] } }, @@ -3606,11 +4342,11 @@ "input": [ { "name": "x", - "value": "rightorifgorbitalpartoftheinferiorfrontalgyrus,rightpoparietaloperculum" + "value": "rightfofrontaloperculum,rightcocentraloperculum,leftitginferiortemporalgyrus" }, { "name": "y", - "value": "gender" + "value": "rs10498633_t" }, { "name": "pathology", @@ -3618,24 +4354,34 @@ }, { "name": "dataset", - "value": "adni" + "value": "adni,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"gender\", \"field\": \"gender\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"F\"}, {\"id\": \"gender\", \"field\": \"gender\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"M\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "1" + }, + { + "name": "negative_level", + "value": "2" } ], "output": { - "n_obs": 1066, + "n_obs": 333, "coeff": [ - -8.147850707857732, - 3.658142419724742, - 1.2941470713567442 + -1.5633277790118707, + 0.058055392025252395, + -0.02479689242321835, + -0.03139170078217617 ], "coeff_names": [ "Intercept", - "rightorifgorbitalpartoftheinferiorfrontalgyrus", - "rightpoparietaloperculum" + "rightfofrontaloperculum", + "rightcocentraloperculum", + "leftitginferiortemporalgyrus" ] } }, @@ -3643,11 +4389,11 @@ "input": [ { "name": "x", - "value": "leftacgganteriorcingulategyrus" + "value": "leftsogsuperioroccipitalgyrus,rightaccumbensarea" }, { "name": "y", - "value": "rs11136000_t" + "value": "rs11767557_c" }, { "name": "pathology", @@ -3655,22 +4401,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "edsd,adni,ppmi" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs11136000_t\", \"field\": \"rs11136000_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}, {\"id\": \"rs11136000_t\", \"field\": \"rs11136000_t\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "0" + }, + { + "name": "negative_level", + "value": "2" } ], "output": { - "n_obs": 891, + "n_obs": 630, "coeff": [ - 1.4186192014038075, - -0.23123887581619618 + -3.1273966661832526, + -0.5322760408955276, + 6.085629859071347 ], "coeff_names": [ "Intercept", - "leftacgganteriorcingulategyrus" + "leftsogsuperioroccipitalgyrus", + "rightaccumbensarea" ] } }, @@ -3678,11 +4434,11 @@ "input": [ { "name": "x", - "value": "leftporgposteriororbitalgyrus" + "value": "leftventraldc" }, { "name": "y", - "value": "rs11767557_c" + "value": "ppmicategory" }, { "name": "pathology", @@ -3690,22 +4446,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "ppmi,edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs11767557_c\", \"field\": \"rs11767557_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs11767557_c\", \"field\": \"rs11767557_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "HC" + }, + { + "name": "negative_level", + "value": "PD" } ], "output": { - "n_obs": 885, + "n_obs": 588, "coeff": [ - -4.859813115957905, - 0.8823708046921395 + -1.1474019532920952, + 0.4026131882967269 ], "coeff_names": [ "Intercept", - "leftporgposteriororbitalgyrus" + "leftventraldc" ] } }, @@ -3713,11 +4477,11 @@ "input": [ { "name": "x", - "value": "rightangangulargyrus" + "value": "subjectageyears,leftpcggposteriorcingulategyrus" }, { "name": "y", - "value": "rs17125944_c" + "value": "gender" }, { "name": "pathology", @@ -3725,22 +4489,32 @@ }, { "name": "dataset", - "value": "adni" + "value": "edsd" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs17125944_c\", \"field\": \"rs17125944_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs17125944_c\", \"field\": \"rs17125944_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"0\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "M" + }, + { + "name": "negative_level", + "value": "F" } ], "output": { - "n_obs": 888, + "n_obs": 368, "coeff": [ - -11.317316120805526, - 0.6418497656843416 + -12.11086909211381, + 0.012403393782012011, + 2.6108780721921945 ], "coeff_names": [ "Intercept", - "rightangangulargyrus" + "subjectageyears", + "leftpcggposteriorcingulategyrus" ] } }, @@ -3748,11 +4522,11 @@ "input": [ { "name": "x", - "value": "rightofugoccipitalfusiformgyrus,rightpallidum,leftainsanteriorinsula,leftppplanumpolare" + "value": "leftpogpostcentralgyrus" }, { "name": "y", - "value": "rs610932_a" + "value": "rs190982_g" }, { "name": "pathology", @@ -3764,24 +4538,26 @@ }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs610932_a\", \"field\": \"rs610932_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}, {\"id\": \"rs610932_a\", \"field\": \"rs610932_a\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "0" + }, + { + "name": "negative_level", + "value": "1" } ], "output": { - "n_obs": 889, + "n_obs": 465, "coeff": [ - -0.35782794033074633, - -0.16913668993225273, - 1.0091154728118277, - -0.26034764460379195, - -0.23210632394910083 + -0.13726812671560204, + 0.022161738099643027 ], "coeff_names": [ "Intercept", - "rightofugoccipitalfusiformgyrus", - "rightpallidum", - "leftainsanteriorinsula", - "leftppplanumpolare" + "leftpogpostcentralgyrus" ] } }, @@ -3789,11 +4565,11 @@ "input": [ { "name": "x", - "value": "leftangangulargyrus,rightioginferioroccipitalgyrus,leftgregyrusrectus,rightptplanumtemporale,leftphgparahippocampalgyrus,rightocpoccipitalpole,rightpoparietaloperculum" + "value": "rightpallidum" }, { "name": "y", - "value": "rs1476679_c" + "value": "parkinsonbroadcategory" }, { "name": "pathology", @@ -3801,34 +4577,30 @@ }, { "name": "dataset", - "value": "adni" + "value": "edsd,ppmi,adni" }, { "name": "filter", - "value": "{\"condition\": \"OR\", \"rules\": [{\"id\": \"rs1476679_c\", \"field\": \"rs1476679_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"1\"}, {\"id\": \"rs1476679_c\", \"field\": \"rs1476679_c\", \"type\": \"string\", \"input\": \"text\", \"operator\": \"equal\", \"value\": \"2\"}], \"valid\": true}" + "value": "" + }, + { + "name": "positive_level", + "value": "Other" + }, + { + "name": "negative_level", + "value": "CN" } ], "output": { - "n_obs": 535, + "n_obs": 309, "coeff": [ - -1.791930950430731, - -0.004797369765243506, - -0.5435253757150752, - -1.3270518120828125, - -2.3830897337180903, - 1.9661296343411743, - 0.0014719830951008502, - 2.3057220555418763 + 1.6312974384749883, + -1.4003791020047216 ], "coeff_names": [ "Intercept", - "leftangangulargyrus", - "rightioginferioroccipitalgyrus", - "leftgregyrusrectus", - "rightptplanumtemporale", - "leftphgparahippocampalgyrus", - "rightocpoccipitalpole", - "rightpoparietaloperculum" + "rightpallidum" ] } } diff --git a/Exareme-Docker/src/mip-algorithms/tests/algorithm_tests/test_anova.py b/Exareme-Docker/src/mip-algorithms/tests/algorithm_tests/test_anova.py new file mode 100644 index 0000000000000000000000000000000000000000..c00eed8e21d542891e9cbfdfb5b85386983eb502 --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/tests/algorithm_tests/test_anova.py @@ -0,0 +1,46 @@ +import pytest +from pathlib import Path +import numpy as np + +from mipframework.testutils import get_test_params, get_algorithm_result +from ANOVA_ONEWAY import Anova + +expected_file = Path(__file__).parent / "expected" / "anova_expected.json" + + +@pytest.mark.parametrize( + "test_input, expected", get_test_params(expected_file, slice(80)) +) +def test_anova_algorithm_local(test_input, expected): + result = get_algorithm_result(Anova, test_input, num_workers=1) + aov = result["anova_table"] + tukey = result["tukey_table"] + e_aov = {k: v for k, v in expected.items() if k != "tukey_test"} + e_tukey = expected["tukey_test"] + assert set(e_aov) == set(aov.keys()) + for key, e_val in e_aov.items(): + r_val = aov[key] + assert np.isclose(e_val, r_val) + for et, rt in zip(e_tukey, tukey): + for key, e_val in et.items(): + r_val = rt[key] + assert e_val == r_val or np.isclose(e_val, r_val) + + +@pytest.mark.parametrize( + "test_input, expected", get_test_params(expected_file, slice(80, 95)) +) +def test_anova_algorithm_federated(test_input, expected): + result = get_algorithm_result(Anova, test_input, num_workers=10) + aov = result["anova_table"] + tukey = result["tukey_table"] + e_aov = {k: v for k, v in expected.items() if k != "tukey_test"} + e_tukey = expected["tukey_test"] + assert set(e_aov) == set(aov.keys()) + for key, e_val in e_aov.items(): + r_val = aov[key] + assert np.isclose(e_val, r_val) + for et, rt in zip(e_tukey, tukey): + for key, e_val in et.items(): + r_val = rt[key] + assert e_val == r_val or np.isclose(e_val, r_val) diff --git a/Exareme-Docker/src/mip-algorithms/tests/algorithm_tests/test_calibration_belt.py b/Exareme-Docker/src/mip-algorithms/tests/algorithm_tests/test_calibration_belt.py index 18a85da9455021e5d3aa8d065f18832ff8111002..6b8c5e4a05572ac05081e167ed278b4fd410c6dc 100644 --- a/Exareme-Docker/src/mip-algorithms/tests/algorithm_tests/test_calibration_belt.py +++ b/Exareme-Docker/src/mip-algorithms/tests/algorithm_tests/test_calibration_belt.py @@ -24,7 +24,7 @@ def test_calibrationbelt_algorithm_local(test_input, expected): "test_input, expected", get_test_params(expected_file, slice(28, 33)) ) def test_calibrationbelt_algorithm_federated(test_input, expected): - result = get_algorithm_result(CalibrationBelt, test_input, num_workers=2) + result = get_algorithm_result(CalibrationBelt, test_input, num_workers=5) expected = expected[0] assert int(result["n_obs"]) == int(expected["n_obs"]) diff --git a/Exareme-Docker/src/mip-algorithms/tests/algorithm_tests/test_logistic_regression.py b/Exareme-Docker/src/mip-algorithms/tests/algorithm_tests/test_logistic_regression.py index 502f5bdfa2367852a2055c7f6427fc8556c34f8b..92758aa68dff4b4b57065cb8706116abf07896aa 100644 --- a/Exareme-Docker/src/mip-algorithms/tests/algorithm_tests/test_logistic_regression.py +++ b/Exareme-Docker/src/mip-algorithms/tests/algorithm_tests/test_logistic_regression.py @@ -14,10 +14,12 @@ expected_file = Path(__file__).parent / "expected" / "logistic_regression_expect def test_logistic_regression_algorithm_local(test_input, expected): result = get_algorithm_result(LogisticRegression, test_input, num_workers=1) - assert ( - np.isclose(result["Coefficients"], expected["coeff"], rtol=1e-3).all() - or np.isclose(result["Coefficients"], expected["coeff"], atol=1e-3).all() - ) + # There is no way to choose which level will be the positive and which the + # negative level in sklearn's LogisticRegression. Sometimes the choice it + # makes agrees with our choice and sometimes it doesn't. In the latter case + # the coefficients lie on the same axis but have opposite orientation, + # hence we only check if the two results are collinear. + assert are_collinear(result["Coefficients"], expected["coeff"]) @pytest.mark.parametrize( @@ -26,7 +28,9 @@ def test_logistic_regression_algorithm_local(test_input, expected): def test_logistic_regression_algorithm_federated(test_input, expected): result = get_algorithm_result(LogisticRegression, test_input, 10) - assert ( - np.isclose(result["Coefficients"], expected["coeff"], rtol=1e-3).all() - or np.isclose(result["Coefficients"], expected["coeff"], atol=1e-3).all() - ) + assert are_collinear(result["Coefficients"], expected["coeff"]) + + +def are_collinear(u, v): + cosine_similarity = np.dot(v, u) / (np.sqrt(np.dot(v, v)) * np.sqrt(np.dot(u, u))) + return np.isclose(abs(cosine_similarity), 1, rtol=1e-5) diff --git a/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_missingDataset.py b/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_missingDataset.py index f1b6623f7a83f9035205fccd45c0f4949a50f811..25ac28bca747c20f244231049619e11810456116 100644 --- a/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_missingDataset.py +++ b/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_missingDataset.py @@ -30,7 +30,7 @@ def test_LINEAR_REGRESSION(): def check_result(result): assert ( result - == '{"result" : [{"data":"The value of the parameter \'dataset\' should not be blank. Algorithm: LINEAR_REGRESSION","type":"text/plain+error"}]}' + == '{"result" : [{"data":"The value of the parameter \'dataset\' should not be blank.","type":"text/plain+user_error"}]}' ) diff --git a/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_missingPathology.py b/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_missingPathology.py index e8e4f86edbc5cdef318d0e2b00831270534c1c15..5c75907b4397fe77ccbca6e9910cc8213bcf7b01 100644 --- a/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_missingPathology.py +++ b/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_missingPathology.py @@ -30,7 +30,7 @@ def test_LINEAR_REGRESSION(): def check_result(result): assert ( result - == '{"result" : [{"data":"The value of the parameter \'pathology\' should not be blank.","type":"text/plain+error"}]}' + == '{"result" : [{"data":"Please provide a pathology.","type":"text/plain+user_error"}]}' ) diff --git a/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_parameters.py b/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_parameters.py index a7c826e677a322c9f349c48f31f68f5cbed5436b..fbd534d60cfa216285b73d70200cd60000b3661c 100644 --- a/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_parameters.py +++ b/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_parameters.py @@ -8,6 +8,27 @@ sys.path.insert(0, "../") from tests import vm_url + +def test_parameter_modulo2(): + + endpointUrl1 = vm_url + "TTEST_PAIRED" + + logging.info("---------- TEST : t-test input parameters throwing error.") + data = [ + {"name": "y", "value": "lefthippocampus-righthippocampus,leftententorhinalarea"}, + {"name": "hypothesis", "value": "different"}, + {"name": "pathology", "value": "dementia"}, + {"name": "dataset", "value": "desd-synthdata"}, + {"name": "filter", "value": ""}, + ] + headers = {"Content-type": "application/json", "Accept": "text/plain"} + r = requests.post(endpointUrl1, data=json.dumps(data), headers=headers) + result = json.loads(r.text) + assert(result["result"][0]["data"]==" The input should be in the form of y1-y2,y3-y4,.. Therefore the number of variables should be modulo 2 ") + assert(result["result"][0]["type"]=="text/plain+user_error") + + + def test_valueEnumerationsParameter(): endpointUrl1 = vm_url + "LINEAR_REGRESSION" @@ -27,7 +48,7 @@ def test_valueEnumerationsParameter(): result = json.loads(r.text) assert ( r.text - == '{"result" : [{"data":"The value \'abcd\' of the parameter \'encodingparameter\' is not included in the valueEnumerations [dummycoding, sumscoding, simplecoding] . Algorithm: LINEAR_REGRESSION","type":"text/plain+error"}]}' + == '{"result" : [{"data":"The value \'abcd\' of the parameter \'encodingparameter\' is not included in the valueEnumerations [dummycoding, sumscoding, simplecoding] .","type":"text/plain+user_error"}]}' ) @@ -52,7 +73,7 @@ def test_parameter_max_value(): assert ( r.text - == '{"result" : [{"data":"The value(s) of the parameter \'sstype\' should be less than 3.0 . Algorithm: ANOVA","type":"text/plain+error"}]}' + == '{"result" : [{"data":"The value(s) of the parameter \'sstype\' should be less than 3.0 .","type":"text/plain+user_error"}]}' ) diff --git a/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_privacy.py b/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_privacy.py index d1e81c98090ba091c3e118777523b59962a1f0b6..1872d3547de0a712a1e0c26043efa42b71fb3c80 100644 --- a/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_privacy.py +++ b/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_privacy.py @@ -32,12 +32,14 @@ from tests.algorithm_tests_with_privacy.test_ttest_paired import ( url_calibration = vm_url + "CALIBRATION_BELT" url_pearson = vm_url + "PEARSON_CORRELATION" url_logreg = vm_url + "LOGISTIC_REGRESSION" +url_anovaoneway = vm_url + "ANOVA_ONEWAY" def check_privacy_result(result): assert ( result - == '{"result" : [{"data":"The Experiment could not run with the input provided because there are insufficient data.","type":"text/plain+warning"}]}' + == '{"result" : [{"data":"The Experiment could not run with the input provided ' + 'because there are insufficient data.","type":"text/plain+warning"}]}' ) @@ -55,7 +57,22 @@ def test_ANOVA_Privacy(): headers = {"Content-type": "application/json", "Accept": "text/plain"} r = requests.post(url_anova, data=json.dumps(data), headers=headers) - result = json.loads(r.text) + check_privacy_result(r.text) + + +def test_ANOVA_ONEWAY_Privacy(): + logging.info("---------- TEST : Anova Oneway privacy test") + + data = [ + {"name": "x", "value": "alzheimerbroadcategory"}, + {"name": "y", "value": "rightmprgprecentralgyrusmedialsegment"}, + {"name": "pathology", "value": "dementia"}, + {"name": "dataset", "value": "adni_9rows"}, + {"name": "filter", "value": ""}, + ] + + headers = {"Content-type": "application/json", "Accept": "text/plain"} + r = requests.post(url_anovaoneway, data=json.dumps(data), headers=headers) check_privacy_result(r.text) @@ -72,7 +89,6 @@ def test_Histogram_Privacy(): headers = {"Content-type": "application/json", "Accept": "text/plain"} r = requests.post(url_hist, data=json.dumps(data), headers=headers) - result = json.loads(r.text) check_privacy_result(r.text) @@ -88,7 +104,10 @@ def test_ID3_Privacy(): {"name": "dataset", "value": "contact-lenses"}, { "name": "filter", - "value": '{"condition": "AND", "rules": [{"id": "CL_age", "field": "CL_age", "type": "string", "input": "text", "operator": "equal", "value": "Young"}], "valid": true}', + "value": '{"condition": "AND", "rules": ' + '[{"id": "CL_age", "field": "CL_age", "type": "string", ' + '"input": "text", "operator": "equal", "value": "Young"}], ' + '"valid": true}', }, {"name": "pathology", "value": "dementia"}, ] @@ -125,7 +144,6 @@ def test_KMEANS_Privacy(): ] headers = {"Content-type": "application/json", "Accept": "text/plain"} r = requests.post(url_kmeans, data=json.dumps(data), headers=headers) - result = json.loads(r.text) check_privacy_result(r.text) @@ -145,7 +163,6 @@ def test_LinearRegression_Privacy(): ] headers = {"Content-type": "application/json", "Accept": "text/plain"} r = requests.post(url_linreg, data=json.dumps(data), headers=headers) - result = json.loads(r.text) check_privacy_result(r.text) @@ -155,18 +172,21 @@ def test_LogisticRegression_Privacy(): data = [ { "name": "x", - "value": "leftententorhinalarea_logreg_test, rightententorhinalarea_logreg_test, lefthippocampus_logreg_test, righthippocampus_logreg_test", + "value": "leftententorhinalarea_logreg_test, " + "rightententorhinalarea_logreg_test, " + "lefthippocampus_logreg_test, " + "righthippocampus_logreg_test", }, {"name": "y", "value": "gender"}, {"name": "pathology", "value": "dementia"}, {"name": "dataset", "value": "adni_9rows"}, {"name": "filter", "value": ""}, + {"name": "positive_level", "value": "F"}, + {"name": "negative_level", "value": "M"}, ] headers = {"Content-type": "application/json", "Accept": "text/plain"} r = requests.post(url_logreg, data=json.dumps(data), headers=headers) - result = json.loads(r.text) - check_privacy_result(r.text) @@ -183,7 +203,6 @@ def test_Multiple_Histogram_Privacy(): headers = {"Content-type": "application/json", "Accept": "text/plain"} r = requests.post(url_multi_hist, data=json.dumps(data), headers=headers) - result = json.loads(r.text) check_privacy_result(r.text) @@ -203,7 +222,6 @@ def test_NAIVEBAYES_privacy(): headers = {"Content-type": "application/json", "Accept": "text/plain"} r = requests.post(url1, data=json.dumps(data1), headers=headers) - result1 = json.loads(r.text) check_privacy_result(r.text) @@ -222,7 +240,6 @@ def test_NaiveBayesStandalone_Privacy(): r = requests.post( url_naive_bayes_standalone, data=json.dumps(data), headers=headers ) - result = json.loads(r.text) check_privacy_result(r.text) @@ -240,7 +257,6 @@ def test_PearsonCorrlation_Privacy(): headers = {"Content-type": "application/json", "Accept": "text/plain"} r = requests.post(url_pearson, data=json.dumps(data), headers=headers) - result = json.loads(r.text) check_privacy_result(r.text) @@ -257,7 +273,6 @@ def test_UnpairedTtest__Privacy(): ] headers = {"Content-type": "application/json", "Accept": "text/plain"} r = requests.post(url_ttest_indep, data=json.dumps(data), headers=headers) - result = json.loads(r.text) check_privacy_result(r.text) @@ -273,7 +288,6 @@ def test_onesamplettest_Privacy(): ] headers = {"Content-type": "application/json", "Accept": "text/plain"} r = requests.post(url_ttest_onesample, data=json.dumps(data), headers=headers) - result = json.loads(r.text) check_privacy_result(r.text) @@ -288,7 +302,6 @@ def test_pairedttest_Privacy(): ] headers = {"Content-type": "application/json", "Accept": "text/plain"} r = requests.post(url_ttest_paired, data=json.dumps(data), headers=headers) - result = json.loads(r.text) check_privacy_result(r.text) @@ -311,7 +324,6 @@ def test_calibration_Privacy(): ] headers = {"Content-type": "application/json", "Accept": "text/plain"} r = requests.post(url_calibration, data=json.dumps(data), headers=headers) - result = json.loads(r.text) check_privacy_result(r.text) diff --git a/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_wrongDataset.py b/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_wrongDataset.py index 376a5f01df1e949132e03a370359ccda9154c902..ece5dbfe1c6225616cccbd94adf96508e4c7ff90 100644 --- a/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_wrongDataset.py +++ b/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_wrongDataset.py @@ -30,7 +30,7 @@ def test_LINEAR_REGRESSION(): def check_result(result): assert ( result - == '{"result" : [{"data":"Dataset(s) demo1 not found for pathology dementia!","type":"text/plain+user_error"}]}' + == '{"result" : [{"data":"Dataset(s) demo1 does not exist in pathology dementia.","type":"text/plain+user_error"}]}' ) diff --git a/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_wrongPathology.py b/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_wrongPathology.py index d24a3a424e50b595f5ddfd4c04dabc8f7c9abb1f..9b7892334ed62375bbc05bbebf086144aec4cbe6 100644 --- a/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_wrongPathology.py +++ b/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_wrongPathology.py @@ -30,7 +30,7 @@ def test_LINEAR_REGRESSION(): def check_result(result): assert ( result - == '{"result" : [{"data":"Pathology pathology1 not found!","type":"text/plain+user_error"}]}' + == '{"result" : [{"data":"Pathology pathology1 is not available.","type":"text/plain+user_error"}]}' ) diff --git a/Exareme-Docker/src/mip-algorithms/tests/integration_tests/test_exareme_integration_anova.py b/Exareme-Docker/src/mip-algorithms/tests/integration_tests/test_exareme_integration_anova.py new file mode 100644 index 0000000000000000000000000000000000000000..63b052b4ae87c117f8f4e0642a5abf9d11070ff9 --- /dev/null +++ b/Exareme-Docker/src/mip-algorithms/tests/integration_tests/test_exareme_integration_anova.py @@ -0,0 +1,33 @@ +import json + +import numpy as np +import pytest +import requests +from mipframework.testutils import get_test_params +from tests import vm_url +from tests.algorithm_tests.test_anova import expected_file + +headers = {"Content-type": "application/json", "Accept": "text/plain"} +url = vm_url + "ANOVA_ONEWAY" + + +@pytest.mark.parametrize( + "test_input, expected", get_test_params(expected_file, slice(95, 100)) +) +def test_anovaoneway_algorithm_exareme(test_input, expected): + result = requests.post(url, data=json.dumps(test_input), headers=headers) + result = json.loads(result.text) + result = result["result"][0]["data"] + + aov = result["anova_table"] + tukey = result["tukey_table"] + e_aov = {k: v for k, v in expected.items() if k != "tukey_test"} + e_tukey = expected["tukey_test"] + assert set(e_aov) == set(aov.keys()) + for key, e_val in e_aov.items(): + r_val = aov[key] + assert np.isclose(e_val, r_val) + for et, rt in zip(e_tukey, tukey): + for key, e_val in et.items(): + r_val = rt[key] + assert e_val == r_val or np.isclose(e_val, r_val) diff --git a/Exareme-Docker/src/mip-algorithms/tests/integration_tests/test_exareme_integration_logistic_regression.py b/Exareme-Docker/src/mip-algorithms/tests/integration_tests/test_exareme_integration_logistic_regression.py index 9c273d908d2a986b527efa78ba3822304b8fd0d3..8b362e21936f7e26fb9c5126aac41c3cd4db1b7f 100644 --- a/Exareme-Docker/src/mip-algorithms/tests/integration_tests/test_exareme_integration_logistic_regression.py +++ b/Exareme-Docker/src/mip-algorithms/tests/integration_tests/test_exareme_integration_logistic_regression.py @@ -19,7 +19,9 @@ def test_logistic_regression_algorithm_exareme(test_input, expected): result = json.loads(result.text) result = result["result"][0]["data"] - assert ( - np.isclose(result["Coefficients"], expected["coeff"], rtol=1e-3).all() - or np.isclose(result["Coefficients"], expected["coeff"], atol=1e-3).all() - ) + assert are_collinear(result["Coefficients"], expected["coeff"]) + + +def are_collinear(u, v): + cosine_similarity = np.dot(v, u) / (np.sqrt(np.dot(v, v)) * np.sqrt(np.dot(u, u))) + return np.isclose(abs(cosine_similarity), 1, rtol=1e-5) diff --git a/Exareme-Docker/src/mip-algorithms/utils/algorithm_utils.py b/Exareme-Docker/src/mip-algorithms/utils/algorithm_utils.py index 4df7c3d4b16ade0d78cf482873889d5240de9e60..674524d7a51859aa91d7a3901c9eac91563492b5 100644 --- a/Exareme-Docker/src/mip-algorithms/utils/algorithm_utils.py +++ b/Exareme-Docker/src/mip-algorithms/utils/algorithm_utils.py @@ -1,555 +1,564 @@ -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import codecs -import errno -import json -import logging -import os -import pickle -import sqlite3 -from argparse import ArgumentParser -from collections import OrderedDict -import warnings - -import numpy as np -import pandas as pd -from patsy import dmatrix, dmatrices - -if "ENVIRONMENT_TYPE" in os.environ: - env_type = os.environ["ENVIRONMENT_TYPE"] - if env_type in {"DEV", "PROD"}: - PRIVACY_MAGIC_NUMBER = 10 - elif env_type == "TEST": - PRIVACY_MAGIC_NUMBER = 0 -else: - PRIVACY_MAGIC_NUMBER = 10 - -P_VALUE_CUTOFF = 0.001 -P_VALUE_CUTOFF_STR = "< " + str(P_VALUE_CUTOFF) - -warnings.filterwarnings("ignore") - - -class TransferAndAggregateData(object): - def __init__(self, **kwargs): - self.data = OrderedDict() - self.reduce_type = OrderedDict() - for k, v in kwargs.items(): - self.data[k] = v[0] - self.reduce_type[k] = v[1] - - def __repr__(self): - ret = "" - for k in self.data.keys(): - ret += "{k} : {val}, reduce by {red_type}\n".format( - k=k, val=self.data[k], red_type=self.reduce_type[k] - ) - return ret - - def __add__(self, other): - kwargs = OrderedDict() - for k in self.data.keys(): - if self.reduce_type[k] == "add": - kwargs[k] = (self.data[k] + other.data[k], "add") - elif self.reduce_type[k] == "max": - kwargs[k] = (max(self.data[k], other.data[k]), "max") - elif self.reduce_type[k] == "concat": - kwargs[k] = (np.concatenate(self.data[k], other.data[k]), "concat") - elif self.reduce_type[k] == "concatdict": - kwargs[k] = {} - for key in self.data[k].keys(): - kwargs[key] = np.concatenate(self.data[k][key], other.data[k][key]) - kwargs[k] = (kwargs[k], "concatdict") - elif self.reduce_type[k] == "do_nothing": - kwargs[k] = (self.data[k], "do_nothing") - else: - raise ValueError( - "{rt} is not implemented as a reduce method.".format( - rt=self.reduce_type[k] - ) - ) - return TransferAndAggregateData(**kwargs) - - @classmethod - def load(cls, inputDB): - conn = sqlite3.connect(inputDB) - cur = conn.cursor() - cur.execute("SELECT data FROM transfer") - first = True - result = None - for row in cur: - if first: - result = pickle.loads(codecs.decode(row[0], "utf-8")) - first = False - else: - result += pickle.loads(codecs.decode(row[0], "utf-8")) - return result - - def transfer(self): - print(codecs.encode(pickle.dumps(self), "utf-8")) - - def get_data(self): - return self.data - - -class TransferData: - def __add__(self, other): - raise NotImplementedError( - "The __add__ method should be implemented by the child class." - ) - - @classmethod - def load(cls, inputDB): - conn = sqlite3.connect(inputDB) - cur = conn.cursor() - cur.execute("SELECT data FROM transfer") - first = True - result = None - for row in cur: - if first: - result = pickle.loads(codecs.decode(row[0], "utf-8")) - first = False - else: - result += pickle.loads(codecs.decode(row[0], "utf-8")) - return result - - def transfer(self): - print(codecs.encode(pickle.dumps(self), "utf-8")) - - -def query_with_privacy(fname_db, query): - conn = sqlite3.connect(fname_db) - cur = conn.cursor() - cur.execute(query) - schema = [description[0] for description in cur.description] - data = cur.fetchall() - if len(data) < PRIVACY_MAGIC_NUMBER: - raise PrivacyError("Query results in illegal number of datapoints.") - return schema, data - - -def query_from_formula( - fname_db, - formula, - variables, - dataset, - query_filter, - data_table, - metadata_table, - metadata_code_column, - metadata_isCategorical_column, - no_intercept=False, - coding=None, -): - """ - Queries a database based on a list of variables and a patsy (R language) formula. Additionally performs privacy - check and returns results only if number of datapoints is sufficient. - - Parameters - ---------- - fname_db : string - Path and name of database. - formula : string or None - Formula in patsy (R language) syntax. E.g. 'y ~ x1 + x2 * x3'. If None a trivial formula of the form 'lhs ~ - rhs' is generated. - variables : tuple of list of strings - A tuple of the form (`lhs`, `rhs`) or (`rhs`,) where `lhs` and `rhs` are lists of the variable names. - dataset : string - A string of a list of datasets. - query_filter : string - TODO - data_table : string - The name of the data table in the database. - metadata_table : string - The name of the metagata table in the database. - metadata_code_column : string - The name of the code column in the metadata table in the database. - metadata_isCategorical_column : string - The name of the is_categorical column in the metadata table in the database. - no_intercept : bool - If no_intercept is True there is no intercept in the returned matrix(-ices). To use in the case where only a - rhs expression is needed, not a full formula. - coding : None or string - Specifies the coding scheme for categorical variables. Must be in {None, 'Treatment', 'Poly', 'Sum', 'Diff', - Helmert'}. - - Returns - ------- - (lhs_dm, rhs_dm) or rhs_dm : pandas.DataFrame objects - When a tilda is present in the formula, the function returns two design matrices (lhs_dm, rhs_dm). - When it is not the function returns just the rhs_dm. - """ - from numpy import log as log - from numpy import exp as exp - - _ = log( - exp(1) - ) # This line is needed to prevent import opimizer from removing above lines - - assert coding in {None, "Treatment", "Poly", "Sum", "Diff", "Helmert"} - dataset = dataset.replace(" ", "").split(",") - - # If no formula is given, generate a trivial one - if formula == "": - formula = "~".join(map(lambda x: "+".join(x), variables)) - variables = reduce(lambda a, b: a + b, variables) - - # Parse filter if given - if query_filter == "": - query_filter_clause = "" - else: - query_filter_clause = parse_filter(json.loads(query_filter)) - - if no_intercept: - formula += "-1" - conn = sqlite3.connect(fname_db) - - # Define query forming functions - def iscateg_query(var): - return "SELECT {is_cat} FROM {metadata} WHERE {code}=='{var}';".format( - is_cat=metadata_isCategorical_column, - metadata=metadata_table, - code=metadata_code_column, - var=var, - ) - - def count_query(varz): - return ( - "SELECT COUNT({var}) FROM {data} WHERE ({var_clause}) AND ({ds_clause})" - " {flt_clause};".format( - var=varz[0], - data=data_table, - var_clause=" AND ".join( - ["{v}!='' and {v} is not null".format(v=v) for v in varz] - ), - ds_clause=" OR ".join(["dataset=='{d}'".format(d=d) for d in dataset]), - flt_clause="" - if query_filter_clause == "" - else "AND ({flt_clause})".format(flt_clause=query_filter_clause), - ) - ) - - def data_query(varz, is_cat): - variables_casts = ", ".join( - [ - v if not c else "CAST({v} AS text) AS {v}".format(v=v) - for v, c in zip(varz, is_cat) - ] - ) - return ( - "SELECT {variables} FROM {data} WHERE ({var_clause}) AND ({ds_clause}) " - " {flt_clause};".format( - variables=variables_casts, - data=data_table, - var_clause=" AND ".join( - ["{v}!='' and {v} is not null".format(v=v) for v in varz] - ), - ds_clause=" OR ".join(["dataset=='{d}'".format(d=d) for d in dataset]), - flt_clause="" - if query_filter_clause == "" - else "AND ({flt_clause})".format(flt_clause=query_filter_clause), - ) - ) - - # Perform privacy check - if ( - pd.read_sql_query(sql=count_query(variables), con=conn).iat[0, 0] - < PRIVACY_MAGIC_NUMBER - ): - raise PrivacyError("Query results in illegal number of datapoints.") - # Pull is_categorical from metadata table - is_categorical = [ - pd.read_sql_query(sql=iscateg_query(v), con=conn).iat[0, 0] for v in variables - ] - if coding is not None: - for c, v in zip(is_categorical, variables): - if c: - formula = formula.replace( - v, "C({v}, {coding})".format(v=v, coding=coding) - ) - # Pull data from db and return design matrix(-ces) - data = pd.read_sql_query(sql=data_query(variables, is_categorical), con=conn) - if "~" in formula: - lhs_dm, rhs_dm = dmatrices(formula, data, return_type="dataframe") - return lhs_dm, rhs_dm - else: - rhs_dm = dmatrix(formula, data, return_type="dataframe") - return None, rhs_dm - - -def parse_filter(query_filter): - _operators = { - "equal": "=", - "not_equal": "!=", - "less": "<", - "greater": ">", - "between": "between", - "not_between": "not_between", - } - - def add_spaces(s): - return " " + s + " " - - def format_rule(rule): - id_ = rule["id"] - op = _operators[rule["operator"]] - val = rule["value"] - type_ = rule["type"] - if type_ == "string": - if type(val) == list: - val = ["'{v}'".format(v=v) for v in val] - else: - val = "'{v}'".format(v=val) - if op == "between": - return "{id} BETWEEN {val1} AND {val2}".format( - id=id_, op=op, val1=val[0], val2=val[1] - ) - elif op == "not_between": - return "{id} NOT BETWEEN {val1} AND {val2}".format( - id=id_, op=op, val1=val[0], val2=val[1] - ) - else: - return "{id}{op}{val}".format(id=id_, op=op, val=val) - - def format_group(group): - return "({group})".format(group=group) - - cond = query_filter["condition"] - rules = query_filter["rules"] - return add_spaces(cond).join( - [ - format_rule(rule=rule) - if "id" in rule - else format_group(group=parse_filter(rule)) - for rule in rules - ] - ) - - -def value_casting(value, type): - if type == "text": - return value - elif type == "real" or type == "int": - return float(value) - - -def variable_type(value): - if str(value) == "text": - return "S16" - elif str(value) == "real" or str(value) == "int": - return "float64" - - -def query_database(fname_db, queryData, queryMetadata): - # connect to database - conn = sqlite3.connect(fname_db) - cur = conn.cursor() - - cur.execute(queryData) - data = cur.fetchall() - if len(data) < PRIVACY_MAGIC_NUMBER: - raise PrivacyError("Query results in illegal number of datapoints.") - dataSchema = [description[0] for description in cur.description] - - cur.execute(queryMetadata) - metadata = cur.fetchall() - metadataSchema = [description[0] for description in cur.description] - conn.close() - - # Save data to pd.Dataframe - dataFrame = pd.DataFrame.from_records(data=data, columns=dataSchema) - - # Cast Dataframe based on metadata - metadataVarNames = [str(x) for x in list(zip(*metadata)[0])] - metadataTypes = [variable_type(x) for x in list(zip(*metadata)[2])] - for varName in dataSchema: - index = metadataVarNames.index(varName) - dataFrame[varName] = dataFrame[varName].astype(metadataTypes[index]) - - return dataSchema, metadataSchema, metadata, dataFrame - - -def variable_categorical_getDistinctValues(metadata): - distinctValues = dict() - dataTypes = zip( - (str(x) for x in list(zip(*metadata)[0])), - (str(x) for x in list(zip(*metadata)[1])), - ) - for md in metadata: - if md[3] == 1: # when variable is categorical - distinctValues[str(md[0])] = [ - value_casting(x, str(md[2])) for x in md[4].split(",") - ] - return distinctValues - - -class StateData(object): - def __init__(self, **kwargs): - self.data = kwargs - - def get_data(self): - return self.data - - def save(self, fname, pickle_protocol=2): - if not os.path.exists(os.path.dirname(fname)): - try: - os.makedirs(os.path.dirname(fname)) - except OSError as exc: # Guard against race condition - if exc.errno != errno.EEXIST: - raise - with open(fname, "wb") as f: - try: - pickle.dump(self, f, protocol=pickle_protocol) - except pickle.PicklingError: - print("Unpicklable object.") - - @classmethod - def load(cls, fname): - with open(fname, "rb") as f: - try: - obj = pickle.load(f) - except pickle.UnpicklingError: - print("Cannot unpickle.") - raise - return obj - - -def init_logger(): - if env_type == "PROD": - logging.basicConfig( - filename="/var/log/exaremePythonAlgorithms.log", level=logging.INFO - ) - else: - logging.basicConfig( - filename="/var/log/exaremePythonAlgorithms.log", level=logging.DEBUG - ) - - -class Global2Local_TD(TransferData): - def __init__(self, **kwargs): - self.data = kwargs - - def get_data(self): - return self.data - - -def set_algorithms_output_data(data): - print(data) - - -class PrivacyError(Exception): - def __init__(self, message): - super(PrivacyError, self).__init__(message) - - -class ExaremeError(Exception): - def __init__(self, message): - super(ExaremeError, self).__init__(message) - - -def make_json_raw(**kwargs): - return {k: v if type(v) != np.ndarray else v.tolist() for k, v in kwargs.items()} - - -def parse_exareme_args(fp): - import json - from os import path - - # Find properties.json and parse algorithm parameters - prop_path = path.abspath(fp) - while not path.isfile(prop_path + "/properties.json"): - prop_path = path.dirname(prop_path) - with open(prop_path + "/properties.json", "r") as prop: - params = json.load(prop)["parameters"] - parser = ArgumentParser() - # Add Exareme arguments - parser.add_argument("-input_local_DB", required=False, help="Path to local db.") - parser.add_argument( - "-db_query", required=False, help="Query to be executed on local db." - ) - parser.add_argument( - "-cur_state_pkl", - required=False, - help="Path to the pickle file holding the current state.", - ) - parser.add_argument( - "-prev_state_pkl", - required=False, - help="Path to the pickle file holding the previous state.", - ) - parser.add_argument("-local_step_dbs", required=False, help="Path to local db.") - parser.add_argument( - "-global_step_db", - required=False, - help="Path to db holding global step results.", - ) - parser.add_argument("-data_table", required=False) - parser.add_argument("-metadata_table", required=False) - parser.add_argument("-metadata_code_column", required=False) - parser.add_argument("-metadata_isCategorical_column", required=False) - # Add algorithm arguments - for p in params: - name = "-" + p["name"] - required = p["valueNotBlank"] - if name != "pathology": - parser.add_argument(name, required=required) - - args, unknown = parser.parse_known_args() - return args - - -def main(): - fname_db = "/Users/zazon/madgik/mip_data/dementia/datasets.db" - lhs = ["lefthippocampus"] - rhs = ["alzheimerbroadcategory"] - variables = (lhs, rhs) - # formula = 'gender ~ alzheimerbroadcategory + lefthippocampus' - formula = "" - query_filter = """ - { - "condition":"AND", - "rules":[ - { - "id":"alzheimerbroadcategory", - "field":"alzheimerbroadcategory", - "type":"string", - "input":"select", - "operator":"not_equal", - "value":"Other" - }, - { - "id":"alzheimerbroadcategory", - "field":"alzheimerbroadcategory", - "type":"string", - "input":"select", - "operator":"not_equal", - "value":"CN" - } - ], - "valid":true - } - """ - Y, X = query_from_formula( - fname_db, - formula, - variables, - data_table="DATA", - dataset="adni, ppmi, edsd", - query_filter=query_filter, - metadata_table="METADATA", - metadata_code_column="code", - metadata_isCategorical_column="isCategorical", - no_intercept=True, - coding=None, - ) - print(X.shape) - print(Y.shape) - print(X.design_info.column_names) - print(Y.design_info.column_names) - # print(Y) - - -if __name__ == "__main__": - main() +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import codecs +import errno +import json +import logging +import os +import pickle +import sqlite3 +from argparse import ArgumentParser +from collections import OrderedDict +import warnings + +import numpy as np +import pandas as pd +from patsy import dmatrix, dmatrices + +if "ENVIRONMENT_TYPE" in os.environ: + env_type = os.environ["ENVIRONMENT_TYPE"] + if env_type in {"DEV", "PROD"}: + PRIVACY_MAGIC_NUMBER = 10 + elif env_type == "TEST": + PRIVACY_MAGIC_NUMBER = 0 +else: + PRIVACY_MAGIC_NUMBER = 10 + +P_VALUE_CUTOFF = 0.001 +P_VALUE_CUTOFF_STR = "< " + str(P_VALUE_CUTOFF) + +warnings.filterwarnings("ignore") + + +class TransferAndAggregateData(object): + def __init__(self, **kwargs): + self.data = OrderedDict() + self.reduce_type = OrderedDict() + for k, v in kwargs.items(): + self.data[k] = v[0] + self.reduce_type[k] = v[1] + + def __repr__(self): + ret = "" + for k in self.data.keys(): + ret += "{k} : {val}, reduce by {red_type}\n".format( + k=k, val=self.data[k], red_type=self.reduce_type[k] + ) + return ret + + def __add__(self, other): + kwargs = OrderedDict() + for k in self.data.keys(): + if self.reduce_type[k] == "add": + kwargs[k] = (self.data[k] + other.data[k], "add") + elif self.reduce_type[k] == "max": + kwargs[k] = (max(self.data[k], other.data[k]), "max") + elif self.reduce_type[k] == "concat": + kwargs[k] = (np.concatenate(self.data[k], other.data[k]), "concat") + elif self.reduce_type[k] == "concatdict": + kwargs[k] = {} + for key in self.data[k].keys(): + kwargs[key] = np.concatenate(self.data[k][key], other.data[k][key]) + kwargs[k] = (kwargs[k], "concatdict") + elif self.reduce_type[k] == "do_nothing": + kwargs[k] = (self.data[k], "do_nothing") + else: + raise ValueError( + "{rt} is not implemented as a reduce method.".format( + rt=self.reduce_type[k] + ) + ) + return TransferAndAggregateData(**kwargs) + + @classmethod + def load(cls, inputDB): + conn = sqlite3.connect(inputDB) + cur = conn.cursor() + cur.execute("SELECT data FROM transfer") + first = True + result = None + for row in cur: + if first: + result = pickle.loads(codecs.decode(row[0], "utf-8")) + first = False + else: + result += pickle.loads(codecs.decode(row[0], "utf-8")) + return result + + def transfer(self): + print(codecs.encode(pickle.dumps(self), "utf-8")) + + def get_data(self): + return self.data + + +class TransferData: + def __add__(self, other): + raise NotImplementedError( + "The __add__ method should be implemented by the child class." + ) + + @classmethod + def load(cls, inputDB): + conn = sqlite3.connect(inputDB) + cur = conn.cursor() + cur.execute("SELECT data FROM transfer") + first = True + result = None + for row in cur: + if first: + result = pickle.loads(codecs.decode(row[0], "utf-8")) + first = False + else: + result += pickle.loads(codecs.decode(row[0], "utf-8")) + return result + + def transfer(self): + print(codecs.encode(pickle.dumps(self), "utf-8")) + + +def query_with_privacy(fname_db, query): + conn = sqlite3.connect(fname_db) + cur = conn.cursor() + cur.execute(query) + schema = [description[0] for description in cur.description] + data = cur.fetchall() + if len(data) < PRIVACY_MAGIC_NUMBER: + raise PrivacyError("Query results in illegal number of datapoints.") + return schema, data + + +def query_from_formula( + fname_db, + formula, + variables, + dataset, + query_filter, + data_table, + metadata_table, + metadata_code_column, + metadata_isCategorical_column, + no_intercept=False, + coding=None, +): + """ + Queries a database based on a list of variables and a patsy (R language) + formula. Additionally performs privacy check and returns results only if + number of datapoints is sufficient. + + Parameters + ---------- + fname_db : string + Path and name of database. + formula : string or None + Formula in patsy (R language) syntax. E.g. 'y ~ x1 + x2 * x3'. If None + a trivial formula of the form 'lhs ~ rhs' is generated. + variables : tuple of list of strings + A tuple of the form (`lhs`, `rhs`) or (`rhs`,) where `lhs` and `rhs` + are lists of the variable names. + dataset : string + A string of a list of datasets. + query_filter : string + TODO + data_table : string + The name of the data table in the database. + metadata_table : string + The name of the metagata table in the database. + metadata_code_column : string + The name of the code column in the metadata table in the database. + metadata_isCategorical_column : string + The name of the is_categorical column in the metadata table in the database. + no_intercept : bool + If no_intercept is True there is no intercept in the returned + matrix(-ices). To use in the case where only a rhs expression is + needed, not a full formula. + coding : None or string + Specifies the coding scheme for categorical variables. Must be in + {None, 'Treatment', 'Poly', 'Sum', 'Diff', Helmert'}. + + Returns + ------- + (lhs_dm, rhs_dm) or rhs_dm : pandas.DataFrame objects + When a tilda is present in the formula, the function returns two design + matrices (lhs_dm, rhs_dm). When it is not the function returns just + the rhs_dm. + """ + from numpy import log as log + from numpy import exp as exp + + _ = log( + exp(1) + ) # This line is needed to prevent import opimizer from removing above lines + + assert coding in {None, "Treatment", "Poly", "Sum", "Diff", "Helmert"} + dataset = dataset.replace(" ", "").split(",") + + # If no formula is given, generate a trivial one + if formula == "": + formula = "~".join(map(lambda x: "+".join(x), variables)) + variables = reduce(lambda a, b: a + b, variables) + + # Parse filter if given + if query_filter == "": + query_filter_clause = "" + else: + query_filter_clause = parse_filter(json.loads(query_filter)) + + if no_intercept: + formula += "-1" + conn = sqlite3.connect(fname_db) + + # Define query forming functions + def iscateg_query(var): + return "SELECT {is_cat} FROM {metadata} WHERE {code}=='{var}';".format( + is_cat=metadata_isCategorical_column, + metadata=metadata_table, + code=metadata_code_column, + var=var, + ) + + def count_query(varz): + return ( + "SELECT COUNT({var}) FROM {data} WHERE ({var_clause}) AND ({ds_clause})" + " {flt_clause};".format( + var=varz[0], + data=data_table, + var_clause=" AND ".join( + ["{v}!='' and {v} is not null".format(v=v) for v in varz] + ), + ds_clause=" OR ".join(["dataset=='{d}'".format(d=d) for d in dataset]), + flt_clause="" + if query_filter_clause == "" + else "AND ({flt_clause})".format(flt_clause=query_filter_clause), + ) + ) + + def data_query(varz, is_cat): + variables_casts = ", ".join( + [ + v if not c else "CAST({v} AS text) AS {v}".format(v=v) + for v, c in zip(varz, is_cat) + ] + ) + return ( + "SELECT {variables} FROM {data} WHERE ({var_clause}) AND ({ds_clause}) " + " {flt_clause};".format( + variables=variables_casts, + data=data_table, + var_clause=" AND ".join( + ["{v}!='' and {v} is not null".format(v=v) for v in varz] + ), + ds_clause=" OR ".join(["dataset=='{d}'".format(d=d) for d in dataset]), + flt_clause="" + if query_filter_clause == "" + else "AND ({flt_clause})".format(flt_clause=query_filter_clause), + ) + ) + + # Perform privacy check + if ( + pd.read_sql_query(sql=count_query(variables), con=conn).iat[0, 0] + < PRIVACY_MAGIC_NUMBER + ): + raise PrivacyError("Query results in illegal number of datapoints.") + # Pull is_categorical from metadata table + is_categorical = [ + pd.read_sql_query(sql=iscateg_query(v), con=conn).iat[0, 0] for v in variables + ] + if coding is not None: + for c, v in zip(is_categorical, variables): + if c: + formula = formula.replace( + v, "C({v}, {coding})".format(v=v, coding=coding) + ) + # Pull data from db and return design matrix(-ces) + data = pd.read_sql_query(sql=data_query(variables, is_categorical), con=conn) + if "~" in formula: + lhs_dm, rhs_dm = dmatrices(formula, data, return_type="dataframe") + return lhs_dm, rhs_dm + else: + rhs_dm = dmatrix(formula, data, return_type="dataframe") + return None, rhs_dm + + +def parse_filter(query_filter): + _operators = { + "equal": "=", + "not_equal": "!=", + "less": "<", + "greater": ">", + "between": "between", + "not_between": "not_between", + } + + def add_spaces(s): + return " " + s + " " + + def format_rule(rule): + id_ = rule["id"] + op = _operators[rule["operator"]] + val = rule["value"] + type_ = rule["type"] + if type_ == "string": + if type(val) == list: + val = ["'{v}'".format(v=v) for v in val] + else: + val = "'{v}'".format(v=val) + if op == "between": + return "{id} BETWEEN {val1} AND {val2}".format( + id=id_, val1=val[0], val2=val[1] + ) + elif op == "not_between": + return "{id} NOT BETWEEN {val1} AND {val2}".format( + id=id_, val1=val[0], val2=val[1] + ) + else: + return "{id}{op}{val}".format(id=id_, op=op, val=val) + + def format_group(group): + return "({group})".format(group=group) + + cond = query_filter["condition"] + rules = query_filter["rules"] + return add_spaces(cond).join( + [ + format_rule(rule=rule) + if "id" in rule + else format_group(group=parse_filter(rule)) + for rule in rules + ] + ) + + +def value_casting(value, type): + if type == "text": + return value + elif type == "real" or type == "int": + return float(value) + + +def variable_type(value): + if str(value) == "text": + return "S16" + elif str(value) == "real" or str(value) == "int": + return "float64" + + +def query_database(fname_db, queryData, queryMetadata): + # connect to database + conn = sqlite3.connect(fname_db) + cur = conn.cursor() + + cur.execute(queryData) + data = cur.fetchall() + # if len(data) < PRIVACY_MAGIC_NUMBER: + # raise PrivacyError("Query results in illegal number of datapoints.") + dataSchema = [description[0] for description in cur.description] + + cur.execute(queryMetadata) + metadata = cur.fetchall() + metadataSchema = [description[0] for description in cur.description] + conn.close() + + # Save data to pd.Dataframe + dataFrame = pd.DataFrame.from_records(data=data, columns=dataSchema) + + # Check privacy. + df = dataFrame.dropna() + if len(df) < PRIVACY_MAGIC_NUMBER: + raise PrivacyError("Query results in illegal number of datapoints.") + + # Cast Dataframe based on metadata + metadataVarNames = [str(x) for x in list(zip(*metadata)[0])] + metadataTypes = [variable_type(x) for x in list(zip(*metadata)[2])] + for varName in dataSchema: + index = metadataVarNames.index(varName) + dataFrame[varName] = dataFrame[varName].astype(metadataTypes[index]) + + return dataSchema, metadataSchema, metadata, dataFrame + + +def variable_categorical_getDistinctValues(metadata): + distinctValues = dict() + dataTypes = zip( + (str(x) for x in list(zip(*metadata)[0])), + (str(x) for x in list(zip(*metadata)[1])), + ) + for md in metadata: + if md[3] == 1: # when variable is categorical + distinctValues[str(md[0])] = [ + value_casting(x, str(md[2])) for x in md[4].split(",") + ] + return distinctValues + + +class StateData(object): + def __init__(self, **kwargs): + self.data = kwargs + + def get_data(self): + return self.data + + def save(self, fname, pickle_protocol=2): + if not os.path.exists(os.path.dirname(fname)): + try: + os.makedirs(os.path.dirname(fname)) + except OSError as exc: # Guard against race condition + if exc.errno != errno.EEXIST: + raise + with open(fname, "wb") as f: + try: + pickle.dump(self, f, protocol=pickle_protocol) + except pickle.PicklingError: + print("Unpicklable object.") + + @classmethod + def load(cls, fname): + with open(fname, "rb") as f: + try: + obj = pickle.load(f) + except pickle.UnpicklingError: + print("Cannot unpickle.") + raise + return obj + + +def init_logger(): + if env_type == "PROD": + logging.basicConfig( + filename="/var/log/exaremePythonAlgorithms.log", level=logging.INFO + ) + else: + logging.basicConfig( + filename="/var/log/exaremePythonAlgorithms.log", level=logging.DEBUG + ) + + +class Global2Local_TD(TransferData): + def __init__(self, **kwargs): + self.data = kwargs + + def get_data(self): + return self.data + + +def set_algorithms_output_data(data): + print(data) + + +class PrivacyError(Exception): + def __init__(self, message): + super(PrivacyError, self).__init__(message) + + +class ExaremeError(Exception): + def __init__(self, message): + super(ExaremeError, self).__init__(message) + + +def make_json_raw(**kwargs): + return {k: v if type(v) != np.ndarray else v.tolist() for k, v in kwargs.items()} + + +def parse_exareme_args(fp): + import json + from os import path + + # Find properties.json and parse algorithm parameters + prop_path = path.abspath(fp) + while not path.isfile(prop_path + "/properties.json"): + prop_path = path.dirname(prop_path) + with open(prop_path + "/properties.json", "r") as prop: + params = json.load(prop)["parameters"] + parser = ArgumentParser() + # Add Exareme arguments + parser.add_argument("-input_local_DB", required=False, help="Path to local db.") + parser.add_argument( + "-db_query", required=False, help="Query to be executed on local db." + ) + parser.add_argument( + "-cur_state_pkl", + required=False, + help="Path to the pickle file holding the current state.", + ) + parser.add_argument( + "-prev_state_pkl", + required=False, + help="Path to the pickle file holding the previous state.", + ) + parser.add_argument("-local_step_dbs", required=False, help="Path to local db.") + parser.add_argument( + "-global_step_db", + required=False, + help="Path to db holding global step results.", + ) + parser.add_argument("-data_table", required=False) + parser.add_argument("-metadata_table", required=False) + parser.add_argument("-metadata_code_column", required=False) + parser.add_argument("-metadata_isCategorical_column", required=False) + # Add algorithm arguments + for p in params: + name = "-" + p["name"] + required = p["valueNotBlank"] + if name != "pathology": + parser.add_argument(name, required=required) + + args, unknown = parser.parse_known_args() + return args + + +def main(): + fname_db = "/Users/zazon/madgik/mip_data/dementia/datasets.db" + lhs = ["lefthippocampus"] + rhs = ["alzheimerbroadcategory"] + variables = (lhs, rhs) + # formula = 'gender ~ alzheimerbroadcategory + lefthippocampus' + formula = "" + query_filter = """ + { + "condition":"AND", + "rules":[ + { + "id":"alzheimerbroadcategory", + "field":"alzheimerbroadcategory", + "type":"string", + "input":"select", + "operator":"not_equal", + "value":"Other" + }, + { + "id":"alzheimerbroadcategory", + "field":"alzheimerbroadcategory", + "type":"string", + "input":"select", + "operator":"not_equal", + "value":"CN" + } + ], + "valid":true + } + """ + Y, X = query_from_formula( + fname_db, + formula, + variables, + data_table="DATA", + dataset="adni, ppmi, edsd", + query_filter=query_filter, + metadata_table="METADATA", + metadata_code_column="code", + metadata_isCategorical_column="isCategorical", + no_intercept=True, + coding=None, + ) + print(X.shape) + print(Y.shape) + print(X.design_info.column_names) + print(Y.design_info.column_names) + # print(Y) + + +if __name__ == "__main__": + main() diff --git a/Federated-Deployment/Compose-Files/docker-compose-master.yml b/Federated-Deployment/Compose-Files/docker-compose-master.yml index bf914bd29c626baf9051588c83442d11dac00e26..eb0a74ad2ee3f5ceb3f0fc09bc3329f0842f40ff 100644 --- a/Federated-Deployment/Compose-Files/docker-compose-master.yml +++ b/Federated-Deployment/Compose-Files/docker-compose-master.yml @@ -25,17 +25,15 @@ networks: services: exareme-keystore: - image: ${CONSUL_IMAGE}:${CONSUL_VERSION} - image: progrium/consul - command: - - -server - - -bootstrap + image: bitnami/consul:1.8.3 + #ports: # Used for Debugging + # - target: 8500 + # published: 8500 + # protocol: tcp + # mode: host + environment: + - CONSUL_BIND_INTERFACE=lo deploy: - restart_policy: - condition: on-failure - delay: 5s - max_attempts: 3 - window: 120s placement: constraints: - node.role == manager # Ensures we only start on manager nodes @@ -52,14 +50,11 @@ services: - TEMP_FILES_CLEANUP_TIME=30 - NODE_COMMUNICATION_TIMEOUT=30000 # (MILIS) NODE COMMUNICATION WILL DROP IF TIMEOUT IS PASSED - ENVIRONMENT_TYPE=PROD # TEST / DEV / PROD + - LOG_LEVEL=INFO # INFO / DEBUG + - CONVERT_CSVS=FALSE # TRUE / FALSE depends_on: - exareme-keystore deploy: - restart_policy: - condition: on-failure - delay: 5s - max_attempts: 3 - window: 120s placement: constraints: - node.role == manager # Ensures we only start on manager nodes diff --git a/Federated-Deployment/Compose-Files/docker-compose-worker.yml b/Federated-Deployment/Compose-Files/docker-compose-worker.yml index 553a03d4dbb82c1e3aa6ba0222dd2a32c78c0eb5..e83d66e883363dd6cb5c4b7a283f378780950140 100644 --- a/Federated-Deployment/Compose-Files/docker-compose-worker.yml +++ b/Federated-Deployment/Compose-Files/docker-compose-worker.yml @@ -33,12 +33,9 @@ services: - TEMP_FILES_CLEANUP_TIME=30 - NODE_COMMUNICATION_TIMEOUT=30000 # (MILIS) NODE COMMUNICATION WILL DROP IF TIMEOUT IS PASSED - ENVIRONMENT_TYPE=PROD # TEST / DEV / PROD + - LOG_LEVEL=INFO # INFO / DEBUG + - CONVERT_CSVS=FALSE # TRUE / FALSE deploy: - restart_policy: - condition: on-failure - delay: 5s - max_attempts: 3 - window: 120s placement: constraints: - node.role == worker # Ensures we only start on worker nodes diff --git a/Local-Deployment/.gitignore b/Local-Deployment/.gitignore deleted file mode 100644 index d7d14215a7d76cbe7bb4808dc7b494ce5d31812e..0000000000000000000000000000000000000000 --- a/Local-Deployment/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -exareme.yaml -data_path.txt -domain_name.txt -portainer/ diff --git a/Local-Deployment/README.md b/Local-Deployment/README.md index 177e14b7ea6a1297f634296eea19a7ac24a2d39e..47712eb2675a68af747c7a8826f036a6221a9c8d 100644 --- a/Local-Deployment/README.md +++ b/Local-Deployment/README.md @@ -4,25 +4,7 @@ Here you will find all the information needed in order to deploy Exareme in your # Requirement -1) Install Docker in your machine. - -2) Since the deployment of a local instance of Exareme involves the creation of a Docker Swarm, you need to make sure that the node has: - -- Static public IP -- Network configuration: - * TCP: ports 2377 and 7946 must be open and available - * UDP: ports 4789 and 7946 must be open and available - * IP protocol 50 (ESP) must be enabled - -# Ports - -Make sure the following ports are available: - -```9090: for accessing Exareme``` - -```(Optional): 8500 for accessing Consul Key Value Store``` - -```(Optional): 9000 for accessing Portainer.io``` +Install Docker and Docker-Compose on your machine. # Preparation @@ -31,7 +13,7 @@ Make sure the following ports are available: Clone this repository on your local computer so you can use it to deploy Exareme. ## Data Structure -In the node in which you will deploy Exareme there should be a *DATA FOLDER* which contains the *DATA* existing in that node. We will refer to the path leading to *DATA FOLDER* as ```data_path```. +In the node in which you will deploy Exareme there should be a *DATA FOLDER* which contains the *DATA* existing in that node. The *DATA FOLDER* should follow a specific structure. It should contain one folder for each pathology that it has datasets for. Inside that folder there should be: @@ -63,41 +45,10 @@ For more information on what these files should contain you can see <a href="../ # Deployment -In the *Local-Deployment/* folder, run the ```deployLocal.sh``` to start the deployment. -You will be prompted to provide any information needed. - -## [Optional] Exareme Version -```This step can be done through the deploy script.``` - -In the ```Local-Deployment/``` folder create an ```exareme.yaml``` file. - -The file should contain the following lines, modify them depending on the version of Exareme you want to deploy. - +Create a ```.env``` file in the *Local-Deployment* folder and add the following: ``` -EXAREME_IMAGE: "hbpmip/exareme" -EXAREME_TAG: "v21.3.0" +EXAREME_IMAGE=hbpmip/exareme:23.0.0 +DATA_FOLDER=/home/user/data ``` -## [Optional] Data path location -```This step can be done through the deploy script.``` - -In the ```Local-Deployment/``` folder create a ```data_path.txt``` file. - -The file should contain the following line, modify it according to the path where your *DATA FOLDER* is. - -``` -LOCAL_DATA_FOLDER=/home/user/data/ -``` - -## [Optional] Secure Portainer - -By default, Portainer’s web interface and API are exposed over HTTP. If you want them to be exposed over HTTPS check -<a href="../Documentation/SecurePortainer.md#optional-secure-portainer">here</a>. Information about how to <a href="../Federated-Deployment/Documentation/Troubleshoot.md#portainer">launch Portainer for the first time.</a> - -# Troubleshooting - -While ```sudo docker service ls```, if the services are Replicated 0/1: - -1) Check that you have enough space in your machine. - -2) If there is an ERROR, try ```sudo docker service ps --no-trunc NAME_OR_ID_OF_SERVICE``` to see the whole message. +Then run the ```docker-compose up``` to start the deployment. You can run ```docker-compose up -d``` to run it in the background. diff --git a/Local-Deployment/deployLocal.sh b/Local-Deployment/deployLocal.sh deleted file mode 100755 index dba4e0b4c2089d9c8cd0d45917a15d231e099460..0000000000000000000000000000000000000000 --- a/Local-Deployment/deployLocal.sh +++ /dev/null @@ -1,212 +0,0 @@ -#!/usr/bin/env bash -# Key-Value Store -EXAREME_KEYSTORE="exareme-keystore:8500" - -# Docker internal folder for the Exareme data -DOCKER_DATA_FOLDER="/root/exareme/data/" - -# Portainer -PORTAINER_PORT="9000" -PORTAINER_IMAGE="portainer/portainer" -PORTAINER_VERSION=":latest" -PORTAINER_DATA=$(echo $PWD)"/portainer" -PORTAINER_NAME="mip_portainer" - -FEDERATION_ROLE="master" - -PUBLIC_IP="127.0.0.1" -LOCAL_IP="127.0.0.1" - -_get_public_ip(){ - ip=$(wget http://ipinfo.io/ip -qO -) - if [ "$ip" != "" ]; then - PUBLIC_IP=$ip - fi -} - -_get_local_ip(){ - local iface=$(ip route show|awk '/^default via/ {print $NF" "$5}'|sort|head -1|cut -d ' ' -f 2) - local ip=$(ip address show|grep "inet.*$iface"|awk '{print $2}'|cut -d '/' -f1) - if [ "$ip" != "" ]; then - LOCAL_IP=$ip - fi -} - -_get_public_ip -_get_local_ip - -flag=0 -#Check if data_path exist -if [[ -s data_path.txt ]]; then - : -else - echo "What is the data_path for host machine?" - read answer - #Check that path ends with / - if [[ "${answer: -1}" != "/" ]]; then - answer=${answer}"/" - fi - echo LOCAL_DATA_FOLDER=${answer} > data_path.txt -fi - -. ./data_path.txt - -#Run convert-csv-to-db -chmod 775 ../Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py -#Removing all previous .db files from the LOCAL_DATA_FOLDER -echo "Starting the process of creating databases.." -echo -e "\nDeleting previous databases." -rm -rf ${LOCAL_DATA_FOLDER}/**/*.db - -echo -e "\nParsing csv files in " ${LOCAL_DATA_FOLDER} " to database files. " -python ../Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py -f ${LOCAL_DATA_FOLDER} -t "master" -#Get the status code from previous command -py_script=$? -#If status code != 0 an error has occurred -if [[ ${py_script} -ne 0 ]]; then - echo -e "\nCreation of databases failed. Exareme will not run until fixes be made." >&2 - exit 1 -fi - -chmod 755 *.sh - -#Check if Exareme docker image exists in file -if [[ -s exareme.yaml ]]; then - : -else - . ./exareme.sh -fi - -#Previous Swarm not found -if [[ $(sudo docker info | grep Swarm | grep inactive*) != '' ]]; then - echo -e "\nInitialize Swarm.." - sudo docker swarm init --advertise-addr=${LOCAL_IP} -#Previous Swarm found -else - echo -e "\nLeaving previous Swarm.." - sudo docker swarm leave -f - sleep 1 - echo -e "\nInitialize Swarm.." - sudo docker swarm init --advertise-addr=${LOCAL_IP} -fi - -#Init network -if [[ $(sudo docker network ls | grep mip-local) == '' ]]; then - echo -e "\nInitialize Network" - sudo docker network create \ - --driver=overlay --opt encrypted --subnet=10.20.30.0/24 --ip-range=10.20.30.0/24 --gateway=10.20.30.254 mip-local -fi - -#Get hostname of node -name=$(hostname) -#. if hostname gives errors, replace with _ -name=${name//./_} - -#Get node Hostname -nodeHostname=$(sudo docker node ls --format {{.Hostname}}) - -echo -e "\nUpdate label name for Swarm node "${nodeHostname} -sudo docker node update --label-add name=${name} ${nodeHostname} -echo -e "\n" - -#Read image from file exareme.yaml -image="" -while read -r line ; do - if [[ ${line:0:1} == "#" ]] || [[ -z ${line} ]] ; then #comment line or empty line, continue - continue - fi - - image=$(echo ${image})$(echo "$line" | cut -d ':' -d ' ' -d '"' -f 2 -d '"')":" - -done < exareme.yaml - -#remove the last : from string -image=${image:0:-1} - -#imageName the first half of string image -imageName=$(echo "$image" | cut -d ':' -f 1) - -#tag the second half of string image -tag=$(echo "$image" | cut -d ':' -f 2 ) - -#Stack deploy -sudo env FEDERATION_NODE=${name} FEDERATION_ROLE=${FEDERATION_ROLE} EXAREME_IMAGE=${imageName}":"${tag} \ -EXAREME_KEYSTORE=${EXAREME_KEYSTORE} DOCKER_DATA_FOLDER=${DOCKER_DATA_FOLDER} \ -LOCAL_DATA_FOLDER=${LOCAL_DATA_FOLDER} \ -docker stack deploy -c docker-compose-master.yml ${name} - -#Portainer -echo -e "\nDo you wish to run Portainer? [ y/n ]" -read answer - -while true - -do - if [[ ${answer} == "y" ]]; then - echo -e "\nDo you wish to run Portainer in a Secure way? (SSL certificate required) [ y/n ]" - read answer - - while true - do - if [[ ${answer} == "y" ]];then - if [[ -s domain_name.txt ]]; then - . ./domain_name.txt - #Run Secure Portainer service - flag=1 - command=$(sudo find /etc/letsencrypt/live/${DOMAIN_NAME}/cert.pem 2> /dev/null) - if [[ ${command} == "/etc/letsencrypt/live/"${DOMAIN_NAME}"/cert.pem" ]]; then - . ./portainer.sh - else - echo -e "\nNo certificate for the Domain name: "${DOMAIN_NAME}" existing in file \"domain_name.txt\". Starting without Portainer.." - fi - else - echo -e "\nWhat is the Domain name for which an SSL certificate created?" - read answer - command=$(sudo find /etc/letsencrypt/live/${answer}/cert.pem 2> /dev/null) - - if [[ ${command} == "/etc/letsencrypt/live/"${answer}"/cert.pem" ]]; then - DOMAIN_NAME=${answer} - - #Optional to store Domain_name in a file - echo -e "\nDo you wish that Domain name to be stored so you will not be asked again? [y/n]" - read answer - while true - do - if [[ ${answer} == "y" ]]; then - echo "Storing information.." - echo DOMAIN_NAME=${DOMAIN_NAME} > domain_name.txt - break - elif [[ ${answer} == "n" ]]; then - echo "You will be asked again to provide the domain name.." - break - else - echo "$answer is not a valid answer! Try again.. [ y/n ]" - read answer - fi - done - - #Run Secure Portainer service - flag=1 - . ./portainer.sh - else - echo -e "\nNo certificate for that Domain name: "${answer}". Starting without Portainer.." - fi - fi - break - elif [[ ${answer} == "n" ]]; then - flag=0 - . ./portainer.sh - break - else - echo ${answer}" is not a valid answer. Try again [ y/n ]" - read answer - fi - done - elif [[ ${answer} == "n" ]]; then - : - else - echo ${answer}" is not a valid answer. Try again [ y/n ]" - read answer - fi - break -done \ No newline at end of file diff --git a/Local-Deployment/docker-compose-master.yml b/Local-Deployment/docker-compose-master.yml deleted file mode 100644 index 8a1ba2554cbd9a7ff60ec2ab9999e1665d4101fc..0000000000000000000000000000000000000000 --- a/Local-Deployment/docker-compose-master.yml +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) 2016-2017 -# Data Intensive Applications and Systems Labaratory (DIAS) -# Ecole Polytechnique Federale de Lausanne -# -# All Rights Reserved. -# -# Permission to use, copy, modify and distribute this software and its -# documentation is hereby granted, provided that both the copyright notice -# and this permission notice appear in all copies of the software, derivative -# works or modified versions, and any portions thereof, and that both notices -# appear in supporting documentation. -# -# This code is distributed in the hope that it will be useful, but WITHOUT ANY -# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. THE AUTHORS AND ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE -# DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE -# USE OF THIS SOFTWARE. - -version: '3.2' - -networks: - net-local: - external: - name: mip-local - -services: - exareme-keystore: - image: progrium/consul - command: - - -server - - -bootstrap - deploy: - restart_policy: - condition: on-failure - delay: 5s - max_attempts: 3 - window: 120s - placement: - constraints: - - node.role == manager # Ensures we only start on manager nodes - - node.labels.name == ${FEDERATION_NODE} - networks: - - "net-local" # Connect the docker container to the global network - - exareme-master: - image: ${EXAREME_IMAGE} - environment: - - CONSULURL=${EXAREME_KEYSTORE} - - FEDERATION_ROLE=${FEDERATION_ROLE} - - NODE_NAME=${FEDERATION_NODE} - - TEMP_FILES_CLEANUP_TIME=30 - - NODE_COMMUNICATION_TIMEOUT=30000 # (MILIS) NODE COMMUNICATION WILL DROP IF TIMEOUT IS PASSED - - ENVIRONMENT_TYPE=PROD # TEST / DEV / PROD - depends_on: - - exareme-keystore - deploy: - restart_policy: - condition: on-failure - delay: 5s - max_attempts: 3 - window: 120s - placement: - constraints: - - node.role == manager # Ensures we only start on manager nodes - - node.labels.name == ${FEDERATION_NODE} - ports: - - target: 9090 # So that we can access the Exareme REST API / interface - published: 9090 - protocol: tcp - mode: host - networks: - - "net-local" # Connect the docker container to the global network - volumes: - - ${LOCAL_DATA_FOLDER}:${DOCKER_DATA_FOLDER} diff --git a/Local-Deployment/docker-compose.yml b/Local-Deployment/docker-compose.yml new file mode 100644 index 0000000000000000000000000000000000000000..5d14439de7b0a2bb9c0584a86c2aeb7d32bcb86c --- /dev/null +++ b/Local-Deployment/docker-compose.yml @@ -0,0 +1,30 @@ +version: '3.2' + +services: + exareme_keystore: + image: bitnami/consul:1.8.3 + environment: + - CONSUL_AGENT_MODE=server + - CONSUL_BOOTSTRAP_EXPECT=1 + - CONSUL_CLIENT_LAN_ADDRESS=0.0.0.0 + - CONSUL_ENABLE_UI=true + restart: unless-stopped + + exareme_master: + image: ${EXAREME_IMAGE} + environment: + - CONSULURL=exareme_keystore:8500 + - FEDERATION_ROLE=master + - NODE_NAME=miplocal + - TEMP_FILES_CLEANUP_TIME=30 + - NODE_COMMUNICATION_TIMEOUT=30000 # (MILIS) NODE COMMUNICATION WILL DROP IF TIMEOUT IS PASSED + - ENVIRONMENT_TYPE=PROD # TEST / DEV / PROD + - LOG_LEVEL=INFO # INFO / DEBUG + - CONVERT_CSVS=TRUE # TRUE / FALSE + depends_on: + - exareme_keystore + ports: + - '9090:9090' + volumes: + - ${DATA_FOLDER}:/root/exareme/data/ + restart: unless-stopped \ No newline at end of file diff --git a/Local-Deployment/exareme.sh b/Local-Deployment/exareme.sh deleted file mode 100755 index fe3690e3ee29ec419ecb5a8773f8e67d32f3c859..0000000000000000000000000000000000000000 --- a/Local-Deployment/exareme.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env bash - -#Check if EXAREME image exists -docker_image_exists() { - curl -s -f https://index.docker.io/v1/repositories/$1/tags/$2 >/dev/null -} - -echo -e "\nCreating file for Exareme image and Exareme tag.." - -while true -do - echo -e "\nType your EXAREME image name:" - read name - - echo -e "\nType your EXAREME image tag:" - read tag - - echo -e "\nChecking if EXAREME image: "\"${name}":"${tag}\"" exists" - - #docker image may exist in docker hub - if docker_image_exists ${name} ${tag}; then - echo "EXAREME Image exists. Continuing..." - #If image exists in DockerHub write it to file exareme.yaml - echo "EXAREME_IMAGE:" \"${name}\" >> exareme.yaml - echo "EXAREME_TAG:" \"${tag}\" >> exareme.yaml - break - #or locally.. - elif [[ "$(sudo docker images -q ${name}:${tag} 2> /dev/null)" != "" ]]; then - echo "EXAREME Image exists. Continuing..." - #if image exists locally write it to file exareme.yaml - echo "EXAREME_IMAGE:" \"${name}\" >> exareme.yaml - echo "EXAREME_TAG:" \"${tag}\" >> exareme.yaml - break - else - echo -e "\nEXAREME image does not exist! EXAREME image name should have a format like: \"hbpmip/exareme\". And EXAREME image tag should have a format like: \"latest\"" - fi -done - - -sleep 1 -return \ No newline at end of file diff --git a/Local-Deployment/portainer.sh b/Local-Deployment/portainer.sh deleted file mode 100755 index 1a5360e5b9b7a51d37cc1dd011e2fe9909c856d9..0000000000000000000000000000000000000000 --- a/Local-Deployment/portainer.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash - -test -d ${PORTAINER_DATA} \ - || sudo mkdir -p ${PORTAINER_DATA} \ - || ( echo Failed to create ${PORTAINER_DATA}; exit 1 ) - - -echo -e "\nCreating a new instance of ${PORTAINER_NAME}.." - -#Secure Portainer -if [[ ${flag} == "1" ]]; then - sudo docker service create \ - --publish mode=host,target=${PORTAINER_PORT},published=9000 \ - --constraint 'node.role == manager' \ - --detach=true --mount type=bind,src=/var/run/docker.sock,dst=/var/run/docker.sock \ - --mount type=bind,src=${PORTAINER_DATA},dst=/data \ - --mount type=bind,src=/etc/letsencrypt/live/${DOMAIN_NAME},dst=/certs/live/${DOMAIN_NAME} \ - --mount type=bind,src=/etc/letsencrypt/archive/${DOMAIN_NAME},dst=/certs/archive/${DOMAIN_NAME} \ - --name ${PORTAINER_NAME} ${PORTAINER_IMAGE}${PORTAINER_VERSION} \ - --ssl --sslcert /certs/live/${DOMAIN_NAME}/cert.pem --sslkey /certs/live/${DOMAIN_NAME}/privkey.pem - -#Non Secure -else - sudo docker service create \ - --publish mode=host,target=${PORTAINER_PORT},published=9000 \ - --constraint 'node.role == manager' \ - --detach=true --mount type=bind,src=/var/run/docker.sock,dst=/var/run/docker.sock \ - --mount type=bind,src=${PORTAINER_DATA},dst=/data \ - --name ${PORTAINER_NAME} ${PORTAINER_IMAGE}${PORTAINER_VERSION} -fi diff --git a/requirements.txt b/requirements.txt index 9d1beffb14c67d62fcb51a9c3b56b873de40d44a..5709812ae5956af439e66951b7c68e7dda558a09 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,4 +14,4 @@ colour requests pytest pytest-xdist - +statsmodels==0.10.2