bibliography.bib

  @inproceedings{gankevich2010para,
    address = {Санкт-Петербург, Россия},
    author = {Ганкевич, И. Г.},
    booktitle = {Материалы пятой студенческой
                    научно-учебной конференции
                    ``Моделирование явлений в
                    технических и гуманитарных
                    науках'' (СНОО 2010)},
    language = {russian},
    pages = {67--68},
    title = {Параллельные вычисления с
                    использованием {OpenCL}},
    year = {2010},
    category = {hpc}
  }
  
  @article{gankevich2010srav,
    author = {Ганкевич, И. Г. and Дегтярев, А. Б. and
                    {Соэ Моэ Лвин}},
    journal = {Морские интеллектуальные
                    технологии},
    language = {russian},
    pages = {10--13},
    title = {Сравнение эффективности
                    применения {MPI} и {OpenCL} для
                    генерации волновой поверхности },
    volume = {4},
    year = {2010},
    category = {oceanwaves}
  }
  
  @inproceedings{degtyarev2011effi,
    address = {Yerevan, Armenia},
    author = {Degtyarev, A. and Gankevich, I.},
    booktitle = {Proceedings of 8th International Conference ``Computer
                    Science \& Information Technologies''},
    language = {english},
    pages = {248--251},
    title = {Efficiency Comparison of Wave Surface Generation Using
                    {OpenCL}, {OpenMP} and {MPI}},
    year = {2011},
    category = {oceanwaves}
  }
  
  @misc{bogdanov2012cpu,
    address = {Дубна},
    author = {Bogdanov, A. V. and Gankevich, I. G. and Gayduchok, V. Yu.
                    and {Pyae Sone Ko Ko}},
    booktitle = {Distributed Computing and Grid-Technologies in Science and
                    Education: Book of Abstr. Of the 5th Intern. Conf.},
    language = {english},
    pages = {38},
    title = {{CPU} and {GPU} Consolidation Based on {OpenCL} },
    year = {2012},
    category = {hpc},
    tags = {wos}
  }
  
  @misc{bogdanov2012prac,
    address = {Dubna, Russia},
    author = {Bogdanov, A. V. and Gankevich, I. G.},
    booktitle = {Distributed Computing and Grid-Technologies in Science and
                    Education: Book of Abstr. Of the 5th Intern. Conf.},
    language = {english},
    pages = {39},
    title = {Practical Efficiency of Optimizing Compilers in Parallel
                    Scientific Applications},
    year = {2012},
    category = {hpc},
    tags = {wos}
  }
  
  @misc{bogdanov2012virt,
    address = {Дубна},
    author = {Bogdanov, A. V. and Degtyarev, A. B. and Gaiduchok, V. Yu.
                    and Gankevich, I. G. and Zolotarev, V. I.},
    booktitle = {Distributed Computing and Grid-Technologies in Science and
                    Education: Book of Abstr. Of the 5th Intern. Conf.},
    language = {english},
    pages = {33},
    title = {Virtual Workspace as a Basis of Supercomputer Center},
    year = {2012},
    category = {virtualisation},
    tags = {wos}
  }
  
  @inproceedings{degtyarev2012eval,
    address = {Athens, Greece},
    author = {Degtyarev, A. and Gankevich, I.},
    booktitle = {Proceedings of 11th International Conference ``Stability of
                    Ships and Ocean Vehicles''},
    language = {english},
    pages = {841--852},
    title = {Evaluation of hydrodynamic pressures for autoregression model
                    of irregular waves},
    year = {2012},
    category = {oceanwaves}
  }
  
  @article{degtiarev2013vychi,
    author = {Дегтярев, А. Б. and Ганкевич, И. Г.},
    journal = {Труды {XLV} НТК ``Проблемы
                    мореходных качеств судов,
                    корабельной гидромеханики и
                    освоения шельфа'' (Крыловские
                    чтения)},
    language = {russian},
    pages = {25--29},
    title = {Вычисление гидродинамических
                    давлений под реальной морской
                    поверхностью на основе
                    авторегрессионной модели
                    нерегулярного волнения},
    year = {2013},
    category = {oceanwaves}
  }
  
  @misc{degtyarev2013hydr,
    address = {Dubna, Russia},
    author = {Degtyarev, A. and Gankevich, I.},
    booktitle = {Book of Abstr. of the international conference ``Mathematical
                    Modeling and Computational Physics'' ({MMCP}'13)},
    language = {english},
    pages = {65},
    title = {Hydrodynamic pressure computation under real sea surface on
                    basis of autoregressive model of irregular waves},
    year = {2013},
    category = {oceanwaves}
  }
  
  @inproceedings{gankevich2013mic,
    address = {Saint Petersburg, Russia},
    author = {Gankevich, I. G.},
    booktitle = {Proceedings of the {XLIV} annual international conference
                    ``Control Processes and Stability'' ({CPS}'13)},
    language = {english},
    title = {{MIC} architecture performance benchmark in problem of
                    solving stochastic differential equations },
    year = {2013},
    category = {hpc}
  }
  
  @inproceedings{gankevich2013mode,
    address = {Yerevan, Armenia},
    author = {Gankevich, Ivan and Degtyarev, Alexander},
    booktitle = {Proceedings of {IX} International Conference on Computer
                    Science and Information Tecnologies ({CSIT}'2013)},
    language = {english},
    pages = {240--244},
    title = {Model of distributed computations in virtual testbed},
    year = {2013},
    category = {hpc}
  }
  
  @inproceedings{gankevich2013virt,
    author = {Gankevich, I. and Gaiduchok, V. and Gushchanskiy, D. and
                    Tipikin, Yu. and Korkhov, V. and Degtyarev, A. and Bogdanov,
                    A. and Zolotarev, V.},
    booktitle = {{IEEE} conference publication},
    language = {english},
    pages = {1--6},
    title = {Virtual private supercomputer: Design and evaluation},
    volume = {6710358},
    year = {2013},
    category = {virtualisation},
    tags = {scopus,wos}
  }
  
  @inproceedings{ishkulov2013tool,
    author = {Ishkulov, Ilarion and Degtyarev, Alexander and Gankevich,
                    Ivan and Gaiduchok, Vladimir and Tipikin, Yury and Korkhov,
                    Vladimir},
    booktitle = {International Conference on Computer Science and Information
                    Technologies},
    language = {english},
    pages = {298--301},
    title = {Tools for Modelling and Simulation Cloud Computing
                    Infrastructure},
    year = {2013},
    category = {virtualisation}
  }
  
  @inproceedings{degtyarev2014calc,
    author = {Degtyarev, A. and Gankevich, I.},
    booktitle = {14th International Ship Stability Workshop},
    language = {english},
    pages = {135--139},
    title = {Calculation Scheme for Wave Pressures with Autoregression
                    Method},
    year = {2014},
    category = {oceanwaves}
  }
  
  @misc{gaiduchok2014runn,
    address = {Dubna},
    author = {Gaiduchok, V. and Yuzhanin, N. and Gankevich, I. and
                    Bogdanov, A.},
    booktitle = {Book of abstracts of the 6th International Conference on
                    Distributed Computing and Grid Technologies in Science and
                    Education},
    language = {english},
    pages = {24},
    title = {Running Applications on a Hybrid Cluster},
    year = {2014},
    category = {hpc}
  }
  
  @misc{gankevich2014appl,
    address = {Dubna},
    author = {Gankevich, I. and Balyan, S. and Abrahamyan, S. and Korkhov,
                    V.},
    booktitle = {Book of abstracts of the 6th International Conference on
                    Distributed Computing and Grid Technologies in Science and
                    Education},
    language = {english},
    title = {Applications of on-demand virtual clusters to high
                    performance computing},
    year = {2014},
    category = {virtualisation}
  }
  
  @article{gankevich2014cons,
    author = {Gankevich, Ivan and Korkhov, Vladimir and Balyan, Serob and
                    Gaiduchok, Vladimir and Gushchanskiy, Dmitry and Tipikin, Yuri
                    and Degtyarev, Alexander and Bogdanov, Alexander},
    journal = {Lecture Notes in Computer Science},
    language = {english},
    pages = {341--354},
    title = {Constructing Virtual Private Supercomputer Using
                    Virtualization and Cloud Technologies},
    volume = {8584},
    year = {2014},
    category = {virtualisation},
    tags = {scopus,wos}
  }
  
  @misc{gankevich2014effi,
    address = {Dubna},
    author = {Gankevich, I. and Degtyarev, A.},
    booktitle = {Book of abstracts of the 6th International Conference on
                    Distributed Computing and Grid Technologies in Science and
                    Education},
    language = {english},
    pages = {25},
    title = {Efficient processing and classification of wave energy
                    spectrum data with a distributed pipeline},
    year = {2014},
    category = {hpc}
  }
  
  @article{gankevich2014meto,
    author = {Ганкевич, И. Г. and Дегтярев, А. Б.},
    journal = {Процессы управления и
                    устойчивость},
    language = {russian},
    number = {17},
    pages = {295--300},
    title = {Методы распределения нагрузки на
                    многопроцессорную систему },
    volume = {1},
    year = {2014},
    category = {hpc}
  }
  
  @article{gankevich2015spec,
    title = {Efficient processing and classification of wave energy
                    spectrum data with a distributed pipeline},
    author = {Ivan Gankevich and Alexander Degtyarev},
    journal = {Computer Research and Modeling},
    publisher = {Institute of Computer Science},
    volume = {7},
    number = {3},
    pages = {517--520},
    year = {2015},
    keywords = {distributed system, big data, data processing, parallel
                    computing},
    abstract = {Processing of large amounts of data often consists of several
                    steps, e.g. pre- and post-processing stages, which are
                    executed sequentially with data written to disk after each
                    step, however, when pre-processing stage for each task is
                    different the more efficient way of processing data is to
                    construct a pipeline which streams data from one stage to
                    another. In a more general case some processing stages can be
                    factored into several parallel subordinate stages thus forming
                    a distributed pipeline where each stage can have multiple
                    inputs and multiple outputs. Such processing pattern emerges
                    in a problem of classification of wave energy spectra based on
                    analytic approximations which can extract different wave
                    systems and their parameters (e.g. wave system type, mean wave
                    direction) from spectrum. Distributed pipeline approach
                    achieves good performance compared to conventional
                    “sequential-stage” processing. },
    url = {http://crm-en.ics.org.ru/journal/article/2301/},
    language = {english},
    category = {hpc},
    tags = {elibrary}
  }
  
  @inbook{bogdanov2015vsc,
    title = {Virtual Supercomputer as basis of Scientific Computing},
    author = {Alexander Bogdanov and Alexander Degtyarev and Vladimir
                    Korkhov and Vladimir Gaiduchok and Ivan Gankevich},
    editor = {Thomas S. Clary},
    year = {2015},
    chapter = {5},
    volume = {11},
    pages = {159--198},
    publisher = {Nova Science Publishers},
    booktitle = {Horizons in Computer Science Research},
    isbn = {978-1-63482-499-6},
    abstract = {Nowadays supercomputer centers strive to provide their
                    computational resources as services, however, present
                    infrastructure is not particularly suited for such a use.
                    First of all, there are standard application programming
                    interfaces to launch computational jobs via command line or a
                    web service, which work well for a program but turn out to be
                    too complex for scientists: they want applications to be
                    delivered to them from a remote server and prefer to interact
                    with them via graphical interface. Second, there are certain
                    applications which are dependent on older versions of
                    operating systems and libraries and it is either non-practical
                    to install those old systems on a cluster or there exists some
                    conflict between these dependencies. Virtualization
                    technologies can solve this problem, but they are not too
                    popular in scientific computing due to overheads introduced by
                    them. Finally, it is difficult to automatically estimate
                    optimal resource pool size for a particular task, thus it
                    often gets done manually by a user. If the large resource pool
                    is requested for a minor task, the efficiency degrades.
                    Moreover, cluster schedulers depend on estimated wall time to
                    execute the jobs and since it cannot be reliably predicted by
                    a human or a machine their efficiency suffers as well.
                    
                    Applications delivery, efficient operating system
                    virtualization and dynamic application resource pool size
                    defining constitute the two problems of scientific computing:
                    complex application interfaces and inefficient use of
                    resources available --- and virtual supercomputer is the way
                    to solve them. The research shows that there are ways to make
                    virtualization technologies efficient for scientific
                    computing: the use of lightweight application containers and
                    dynamic creation of these containers for a particular job are
                    both fast and transparent for a user. There are universal ways
                    to deliver application output to a front-end using execution
                    of a job on a cluster and presenting its results in a
                    graphical form. Finally, an application framework can be
                    developed to decompose parallel application into small
                    independent parts with easily predictable execution time, to
                    simplify scheduling via existing algorithms.
                    
                    The aim of this chapter is to promote the key idea of a
                    virtual supercomputer: to harness all available HPC resources
                    and provide users with convenient access to them. Such a
                    challenge can be effectively faced using contemporary
                    virtualization technologies. They can materialize the
                    long-term dream of having a supercomputer at your own desk. },
    language = {english},
    category = {virtualisation},
    tags = {scopus}
  }
  
  @article{bogdanov2015hybrid,
    title = {Running applications on a hybrid cluster},
    author = {Bogdanov, A. and Gankevich, I. and Gayduchok V. and Yuzhanin,
                    N.},
    journal = {Computer Research and Modeling},
    publisher = {Institute of Computer Science},
    volume = {7},
    number = {3},
    pages = {475--483},
    year = {2015},
    keywords = {GPGPU, HPC, computational clusters, OpenFOAM, LINPACK,
                    ViennaCL, CUDA, OpenCL},
    abstract = {A hybrid cluster implies the use of computational devices
                    with radically different architectures. Usually, these are
                    conventional CPU architecture (e.g. {x86_64}) and GPU
                    architecture (e. g. NVIDIA CUDA). Creating and exploiting such
                    a cluster requires some experience: in order to harness all
                    computational power of the described system and get
                    substantial speedup for computational tasks many factors
                    should be taken into account. These factors consist of
                    hardware characteristics (e.g. network infrastructure, a type
                    of data storage, GPU architecture) as well as software stack
                    (e.g. MPI implementation, GPGPU libraries). So, in order to
                    run scientific applications GPU capabilities, software
                    features, task size and other factors should be considered.
                    
                    This report discusses opportunities and problems of hybrid
                    computations. Some statistics from tests programs and
                    applications runs will be demonstrated. The main focus of
                    interest is open source applications (e. g. OpenFOAM) that
                    support GPGPU (with some parts rewritten to use GPGPU directly
                    or by replacing libraries).
                    
                    There are several approaches to organize heterogeneous
                    computations for different GPU architectures out of which CUDA
                    library and OpenCL framework are compared. CUDA library is
                    becoming quite typical for hybrid systems with NVIDIA cards,
                    but OpenCL offers portability opportunities which can be a
                    determinant factor when choosing framework for development. We
                    also put emphasis on multi-GPU systems that are often used to
                    build hybrid clusters. Calculations were performed on a hybrid
                    cluster of SPbU computing center. },
    url = {http://crm-en.ics.org.ru/journal/article/2295/},
    language = {english},
    category = {coop},
    tags = {elibrary}
  }
  
  @article{gankevich2015virtcluster,
    title = {Applications of on-demand virtual clusters to high
                    performance computing},
    author = {Gankevich, I. and Balyan, S. and Abrahamyan, S. and Korkhov,
                    V.},
    journal = {Computer Research and Modeling},
    publisher = {Institute of Computer Science},
    volume = {7},
    number = {3},
    pages = {511--516},
    year = {2015},
    keywords = {virtual machine, lightweight virtualisation, application
                    containers},
    abstract = {Virtual machines are usually associated with an ability to
                    create them on demand by calling web services, then these
                    machines are used to deliver resident services to their
                    clients; however, providing clients with an ability to run an
                    arbitrary programme on the newly created machines is beyond
                    their power. Such kind of usage is useful in a high
                    performance computing environment where most of the resources
                    are consumed by batch programmes and not by daemons or
                    services. In this case a cluster of virtual machines is
                    created on demand to run a distributed or parallel programme
                    and to save its output to a network attached storage. Upon
                    completion this cluster is destroyed and resources are
                    released. With certain modifications this approach can be
                    extended to interactively deliver computational resources to
                    the user thus providing virtual desktop as a service.
                    Experiments show that the process of creating virtual clusters
                    on demand can be made efficient in both cases. },
    url = {http://crm-en.ics.org.ru/journal/article/2300/},
    language = {english},
    category = {virtualisation},
    tags = {elibrary}
  }
  
  @article{yuzhanin2015tasktrack,
    title = {Computational task tracking complex in the scientific project
                    informational support system},
    author = {Yuzhanin, N. and Tipikin, Yu. and Gankevich, I. and Zolotarev
                    V.},
    journal = {Computer Research and Modeling},
    publisher = {Institute of Computer Science},
    volume = {7},
    number = {3},
    pages = {615--620},
    year = {2015},
    keywords = {service desk, task tracking, HPC, web service},
    abstract = {This work describes the idea of the system of informational
                    support for the scientific projects and the development of
                    computational task tracking complex. Due to large requirements
                    for computational experiments the problem of presentation of
                    the information about HPC tasks becomes one of the most
                    important. Nonstandard usage of the service desk system as a
                    basis of the computational task tracking and support system
                    can be the solution of this problem. Particular attention is
                    paid to the analysis and the satisfaction of the conflicting
                    requirements to the task tracking complex from the different
                    user groups. Besides the web service kit used for the
                    integration of the task tracking complex and the datacenter
                    environment is considered. This service kit became the main
                    interconnect between the parts of the scientific project
                    support system and also this kit allows to reconfigure the
                    whole system quickly and safely. },
    url = {http://crm-en.ics.org.ru/journal/article/2316/},
    language = {english},
    category = {coop},
    tags = {scopus}
  }
  
  @inproceedings{gankevich2015subord,
    author = {Gankevich, Ivan and Tipikin, Yuri and Gaiduchok, Vladimir},
    booktitle = {International Conference on High Performance Computing
                    Simulation (HPCS)},
    title = {Subordination: Cluster management without distributed
                    consensus},
    year = {2015},
    pages = {639--642},
    keywords = {Clustering algorithms;Computers;Heuristic algorithms;IP
                    networks;Network topology;Nominations and
                    elections;Topology;cluster accounting;cluster
                    management;cluster monitoring;job scheduling;leader election},
    doi = {10.1109/HPCSim.2015.7237106},
    abstract = {Nowadays, many cluster management systems rely on distributed
                    consensus algorithms to elect a leader that orchestrates
                    subordinate nodes. Contrary to these studies we propose
                    consensus-free algorithm that arranges cluster nodes into
                    multiple levels of subordination. The algorithm structures IP
                    address range of cluster network so that each node has ranked
                    list of candidates, from which it chooses a leader. The
                    results show that this approach easily scales to a large
                    number of nodes due to its asynchronous nature, and enables
                    fast recovery from node failures as they occur only on one
                    level of hierarchy. Multiple levels of subordination are
                    useful for efficiently collecting monitoring and accounting
                    data from large number of nodes, and for scheduling
                    general-purpose tasks on a cluster. },
    note = {Outstanding poster paper award.},
    language = {english},
    category = {hpc},
    tags = {scopus,wos}
  }
  
  @incollection{bogdanov2015sched,
    year = {2015},
    isbn = {978-3-319-21409-2},
    booktitle = {Computational Science and Its Applications -- ICCSA 2015},
    volume = {9158},
    series = {Lecture Notes in Computer Science},
    editor = {Gervasi, Osvaldo and Murgante, Beniamino and Misra, Sanjay
                    and Gavrilova, Marina L. and Rocha, Ana Maria Alves Coutinho
                    and Torre, Carmelo and Taniar, David and Apduhan, Bernady O.},
    doi = {10.1007/978-3-319-21410-8_23},
    title = {Profiling Scheduler for Efficient Resource Utilization},
    url = {http://dx.doi.org/10.1007/978-3-319-21410-8_23},
    publisher = {Springer International Publishing},
    keywords = {Computational cluster; Scheduler; HPC; Profiling; Resource
                    sharing; Load balancing; Networking},
    author = {Bogdanov, Alexander and Gaiduchok, Vladimir and Ahmed, Nabil
                    and Cubahiro, Amissi and Gankevich, Ivan},
    pages = {299--310},
    abstract = {Optimal resource utilization is one of the most important and
                    most challenging tasks for computational centers. A typical
                    contemporary center includes several clusters. These clusters
                    are used by many clients. So, administrators should set
                    resource sharing policies that will meet different
                    requirements of different groups of users. Users want to
                    compute their tasks fast while organizations want their
                    resources to be utilized efficiently. Traditional schedulers
                    do not allow administrator to efficiently solve these problems
                    in that way. Dynamic resource reallocation can improve the
                    efficiency of system utilization while profiling running
                    applications can generate important statistical data that can
                    be used in order to optimize future application usage. These
                    are basic advantages of a new scheduler that are discussed in
                    this paper. },
    language = {english},
    category = {coop},
    tags = {scopus,wos}
  }
  
  @inproceedings{yuzhanin2015asian,
    author = {Yuzhanin, Artur and Gankevich, Ivan and Stepanov, Eduard and
                    Korkhov, Vladimir},
    booktitle = {International Conference on High Performance Computing
                    Simulation (HPCS)},
    title = {Efficient Asian option pricing with CUDA},
    year = {2015},
    pages = {623--628},
    keywords = {Arrays;Graphics processing units;Instruction
                    sets;Kernel;Mathematical model;Pricing;Random variables},
    doi = {10.1109/HPCSim.2015.7237103},
    abstract = {In this paper the Monte Carlo methods of the Asian option
                    pricing are considered. Among them are pricing method with
                    path integral and partial differential equation. Simulation
                    algorithms running on the CPU sequentially and algorithms
                    running on the GPU in parallel using the CUDA technology were
                    analyzed and compared. },
    language = {english},
    category = {coop},
    tags = {scopus,wos}
  }
  
  @incollection{gankevich2015novel,
    year = {2015},
    isbn = {978-3-319-21409-2},
    booktitle = {Computational Science and Its Applications -- ICCSA 2015},
    volume = {9158},
    series = {Lecture Notes in Computer Science},
    editor = {Gervasi, Osvaldo and Murgante, Beniamino and Misra, Sanjay
                    and Gavrilova, Marina L. and Rocha, Ana Maria Alves Coutinho
                    and Torre, Carmelo and Taniar, David and Apduhan, Bernady O.},
    doi = {10.1007/978-3-319-21410-8_20},
    title = {Novel Approaches for Distributing Workload on Commodity
                    Computer Systems},
    url = {http://dx.doi.org/10.1007/978-3-319-21410-8_20},
    publisher = {Springer International Publishing},
    keywords = {Long-lived transactions; Distributed pipeline; Node
                    discovery; Software engineering; Distributed computing;
                    Cluster computing},
    author = {Gankevich, Ivan and Tipikin, Yuri and Degtyarev, Alexander
                    and Korkhov, Vladimir},
    pages = {259--271},
    language = {english},
    category = {hpc},
    tags = {scopus,wos}
  }
  
  @article{degtyarev2015hydr,
    author = {Degtyarev, A. and Gankevich, I.},
    journal = {Physics of Particles and Nuclei Letters},
    publisher = {Pleiades Publishing},
    language = {english},
    number = {3},
    pages = {389--391},
    title = {Hydrodynamic Pressure Computation under Real Sea Surface on
                    Basis of Autoregressive Model of Irregular Waves},
    volume = {12},
    year = {2015},
    doi = {10.1134/S1547477115030073},
    url = {http://dx.doi.org/10.1134/S1547477115030073},
    category = {oceanwaves},
    abstract = {Determining the impact of external excitations on a dynamic
                    marine object such as ship hull in a seaway is the main goal
                    of simulations. Now such simulations is most often based on
                    approximate mathematical models that use results of the theory
                    of small amplitude waves. The most complicated software for
                    marine objects behavior simulation LAMP IV uses numerical
                    solution of traditional hydrodynamic problem without often
                    used approximations but on the basis of theory of small
                    amplitude waves. For efficiency reasons these simulations can
                    be based on autoregressive model to generate real wave
                    surface. Such a surface possesses all the hydrodynamic
                    characteristics of sea waves, preserves dispersion relation
                    and also shows superior performance compared to other wind
                    wave models. Naturally, the known surface can be used to
                    compute velocity field and in turn to determine pressures in
                    any point under sea surface. The resulting computational
                    algorithm can be used to determine pressures without use of
                    theory of small-amplitude waves. },
    tags = {scopus,wos}
  }
  
  @inproceedings{gaiduchok2016network,
    editor = {Vladimir Korenkov and Tatiana Zaikina and Andrey
                    Nechaevskiy},
    author = {Bogdanov, A. and Gaiduchok, V. and Ahmed, N. and Ivanov, P.
                    and Gankevich, I.},
    title = {Improving networking performance of a Linux cluster},
    issn = {1613-0073},
    publisher = {CEUR-WS.org},
    series = {CEUR Workshop Proceedings},
    booktitle = {Proceedings of the 7th International Conference Distributed
                    Computing and Grid-technologies in Science and Education
                    2016},
    url = {http://ceur-ws.org/Vol-1787/},
    volume = {1787},
    year = {2016},
    language = {english},
    category = {coop},
    tags = {scopus}
  }
  
  @inbook{degtyarev2016balance,
    author = {Degtyarev, Alexander and Gankevich, Ivan},
    editor = {Gavrilova, L. Marina and Tan, Kenneth C. J.},
    title = {Balancing Load on a Multiprocessor System with Event-Driven
                    Approach},
    booktitle = {Transactions on Computational Science XXVII},
    year = {2016},
    publisher = {Springer Berlin Heidelberg},
    address = {Berlin, Heidelberg},
    pages = {35--52},
    isbn = {978-3-662-50412-3},
    doi = {10.1007/978-3-662-50412-3_3},
    url = {http://dx.doi.org/10.1007/978-3-662-50412-3_3},
    language = {english},
    category = {hpc},
    tags = {scopus}
  }
  
  @inbook{yuzhanin2016servicedesk,
    author = {Bogdanov, A. V. and Gaiduchok, V. Yu. and Gankevich, I. G.
                    and Tipikin, Yu. A. and Yuzhanin, N. V.},
    editor = {Gavrilova, Marina L. and Tan, C.J. Kenneth},
    title = {The Use of Service Desk System to Keep Track of Computational
                    Tasks on Supercomputers},
    booktitle = {Transactions on Computational Science XXVII},
    year = {2016},
    publisher = {Springer Berlin Heidelberg},
    address = {Berlin, Heidelberg},
    pages = {1--9},
    isbn = {978-3-662-50412-3},
    doi = {10.1007/978-3-662-50412-3_1},
    url = {http://dx.doi.org/10.1007/978-3-662-50412-3_1},
    language = {english},
    category = {hpc},
    tags = {scopus}
  }
  
  @inbook{gankevich2016master,
    author = {Gankevich, Ivan and Tipikin, Yuri and Korkhov, Vladimir and
                    Gaiduchok, Vladimir and Degtyarev, Alexander and Bogdanov,
                    Alexander},
    editor = {Gervasi, Osvaldo and Murgante, Beniamino and Misra, Sanjay
                    and Rocha, A.C. Ana Maria and Torre, M. Carmelo and Taniar,
                    David and Apduhan, O. Bernady and Stankova, Elena and Wang,
                    Shangguang},
    title = {Factory: Master Node High-Availability for Big Data
                    Applications and Beyond},
    booktitle = {Computational Science and Its Applications -- ICCSA 2016:
                    16th International Conference, Beijing, China, July 4-7, 2016,
                    Proceedings, Part II},
    year = {2016},
    publisher = {Springer International Publishing},
    pages = {379--389},
    isbn = {978-3-319-42108-7},
    doi = {10.1007/978-3-319-42108-7_29},
    url = {http://dx.doi.org/10.1007/978-3-319-42108-7_29},
    language = {english},
    category = {hpc},
    tags = {scopus,wos}
  }
  
  @inproceedings{sveshnikova2016dnn,
    author = {E. Milova and S. Sveshnikova and I. Gankevich},
    booktitle = {2016 International Conference on High Performance Computing
                    Simulation (HPCS)},
    title = {Speedup of deep neural network learning on the
                    MIC-architecture},
    year = {2016},
    pages = {989--992},
    keywords = {coprocessors;iterative methods;learning (artificial
                    intelligence);multiprocessing systems;neural nets;parallel
                    processing;storage management;MIC architecture;code
                    vectorization;computational power;coprocessor data transfer
                    model;deep neural network learning;implicit memory
                    copying;iterative process;many integrated core
                    architecture;memory blocks;parallelization method;Biological
                    neural networks;Computer architecture;Coprocessors;Data
                    models;Data transfer;Microwave integrated
                    circuits;Optimization;DNN;Xeon Phi;coprocessor;many-core
                    architecture;offload;optimisation;parallel
                    computing;vectorization},
    abstract = {Deep neural networks are more accurate, but require more
                    computational power in the learning process. Moreover, it is
                    an iterative process. The goal of the research is to
                    investigate efficiency of solving this problem on MIC
                    architecture without changing baseline algorithm. Well-known
                    code vectorization and parallelization methods are used to
                    increase the effectiveness of the program on MIC architecture.
                    In the course of the experiments we test two coprocessor data
                    transfer models: explicit and implicit one. We show that
                    implicit memory copying is more efficient than explicit one,
                    because only modified memory blocks are copied. MIC
                    architecture shows competitive performance compared to
                    multi-core x86 processor.},
    doi = {10.1109/HPCSim.2016.7568443},
    month = {July},
    language = {english},
    category = {coop},
    tags = {scopus,wos}
  }
  
  @inproceedings{gankevich2016factory,
    author = {I. Gankevich and Y. Tipikin and V. Korkhov and V. Gaiduchok},
    booktitle = {International Conference on High Performance Computing
                    Simulation (HPCS'16)},
    title = {Factory: Non-stop batch jobs without checkpointing},
    year = {2016},
    pages = {979--984},
    keywords = {parallel processing;software fault tolerance;checkpoint
                    mechanisms;computational microkernels;factory;hydrodynamics
                    HPC application;node failures;nonstop batch
                    jobs;Computers;Fault tolerance;Fault tolerant
                    systems;Kernel;Libraries;Message passing;Production
                    facilities;cluster computing;distributed computing;fault
                    tolerance;job scheduling;parallel computing},
    abstract = {Nowadays many job schedulers rely on checkpoint mechanisms to
                    make long-running batch jobs resilient to node failures. At
                    large scale stopping a job and creating its image consumes
                    considerable amount of time. The aim of this study is to
                    propose a method that eliminates this overhead. For this
                    purpose we decompose a problem being solved into computational
                    microkernels which have strict hierarchical dependence on each
                    other. When a kernel abruptly stops its execution due to a
                    node failure, it is responsibility of its principal to restart
                    computation on a healthy node. In the course of experiments we
                    successfully applied this method to make hydrodynamics HPC
                    application run on constantly changing number of nodes. We
                    believe, that this technique can be generalised to other types
                    of scientific applications as well.},
    doi = {10.1109/HPCSim.2016.7568441},
    month = {July},
    language = {english},
    category = {hpc},
    tags = {scopus,wos}
  }
  
  @inproceedings{sveshnikova2016mic,
    title = {Ускорение обучения глубокой
                    нейронной сети путем оптимизации
                    алгоритма для запуска на MIC
                    архитектуре},
    author = {Милова, Е.А. and Свешникова, С.Ю. and
                    Ганкевич, И.Г.},
    editor = {Смирнов, Н.В.},
    publisher = {Издательский дом Федоровой Г.В.},
    address = {СПб, Россия},
    booktitle = {Труды 47-й международной научной
                    конференции аспирантов и
                    студентов},
    abstract = {Глубокие нейронные сети
                    позволяют получить высокую
                    точность распознавания, но
                    требуют больших вычислительных
                    мощностей и временных затрат в
                    процессе обучения. Второе связано
                    с тем, что алгоритм обучения
                    является итеративным. Целью
                    исследования является изучение
                    эффективности решения данной
                    задачи на MIC архитектуре без
                    изменения базового алгоритма. Для
                    повышения эффективности работы
                    программы на MIC архитектуре были
                    использованы приемы векторизации
                    и распараллеливания кода. В
                    процессе исследования были
                    опробованы 2 модели передачи
                    данных на сопроцессор: явная и
                    неявная и проведено сравнение их
                    эффективности. Рассмотрены
                    причины, влияющие на
                    эффективность распараллеливания
                    данной задачи. MIC архитектура
                    показала показала
                    производительность, сравнимую с
                    многоядерным процессором. },
    year = {2016},
    volume = {3},
    number = {1},
    pages = {367--371},
    issn = {2313-7304},
    url = {http://hdl.handle.net/11701/2609},
    language = {russian},
    category = {coop},
    tags = {elibrary}
  }
  
  @inbook{korkhov2017ascheduler,
    author = {Korkhov, Vladimir and Gankevich, Ivan and Iakushkin, Oleg and
                    Gushchanskiy, Dmitry and Khmel, Dmitry and Ivashchenko, Andrey
                    and Pyayt, Alexander and Zobnin, Sergey and Loginov,
                    Alexander},
    editor = {Gervasi, Osvaldo and Murgante, Beniamino and Misra, Sanjay
                    and Borruso, Giuseppe and Torre, Carmelo M. and Rocha, Ana
                    Maria A.C. and Taniar, David and Apduhan, Bernady O. and
                    Stankova, Elena and Cuzzocrea, Alfredo},
    title = {Distributed Data Processing on Microcomputers with Ascheduler
                    and Apache Spark},
    booktitle = {Computational Science and Its Applications -- ICCSA 2017:
                    Proceedings of 17th International Conference, Part V},
    year = {2017},
    month = {July},
    publisher = {Springer International Publishing},
    address = {Cham},
    pages = {387--398},
    abstract = {Modern architectures of data acquisition and processing often
                    consider low-cost and low-power devices that can be bound
                    together to form a distributed infrastructure. In this paper
                    we overview possibilities to organize a distributed computing
                    testbed based on microcomputers similar to Raspberry Pi and
                    Intel Edison. The goal of the research is to investigate and
                    develop a scheduler for orchestrating distributed data
                    processing and general purpose computations on such unreliable
                    and resource-constrained hardware. Also we consider
                    integration of the scheduler with well-known distributed data
                    processing framework Apache Spark. We outline the project
                    carried out in collaboration with Siemens LLC to compare
                    different configurations of the hardware and software
                    deployment and evaluate performance and applicability of the
                    tools to the testbed.},
    isbn = {978-3-319-62404-4},
    doi = {10.1007/978-3-319-62404-4_28},
    url = {https://doi.org/10.1007/978-3-319-62404-4_28},
    language = {english},
    category = {hpc},
    tags = {scopus},
    nrefs = {10}
  }
  
  @inbook{ivashchenko2017gpulab,
    author = {Ivashchenko, Andrei and Belezeko, Alexey and Gankevich, Ivan
                    and Korkhov, Vladimir and Kulabukhova, Nataliia},
    editor = {Gervasi, Osvaldo and Murgante, Beniamino and Misra, Sanjay
                    and Borruso, Giuseppe and Torre, Carmelo M. and Rocha, Ana
                    Maria A.C. and Taniar, David and Apduhan, Bernady O. and
                    Stankova, Elena and Cuzzocrea, Alfredo},
    title = {Acceleration of Computing and Visualization Processes with
                    OpenCL for Standing Sea Wave Simulation Model},
    booktitle = {Computational Science and Its Applications -- ICCSA 2017:
                    Proceedings 17th International Conference, Part V},
    year = {2017},
    month = {July},
    publisher = {Springer International Publishing},
    address = {Cham},
    pages = {505--518},
    abstract = {In this paper we highlight one of the possible acceleration
                    approaches for the standing wave model simulation model with
                    the use of OpenCL framework for GPGPU computations. We provide
                    a description of the wave's mathematical model, an explanation
                    for the technology selection, as well as the identification of
                    the algorithm part that can be accelerated. The text also
                    contains a description of solution's performance evaluation
                    stage being compared with CPU-only program. The influence of
                    OpenCL usage for improvements in rendering process is also
                    shown here. Finally, possible ways of application improvement
                    and further development are also considered.},
    isbn = {978-3-319-62404-4},
    doi = {10.1007/978-3-319-62404-4_38},
    url = {https://doi.org/10.1007/978-3-319-62404-4_38},
    language = {english},
    category = {hpc},
    tags = {scopus},
    nrefs = {15}
  }
  
  @inproceedings{gavrikov2017paddle,
    title = {Нейронные сети в задаче
                    предсказания дорожного трафика},
    author = {Гавриков, А. and Свешникова, С. and
                    Ганкевич, И.},
    editor = {Смирнов, Н.В.},
    publisher = {Издательский дом Федоровой Г.В.},
    address = {СПб, Россия},
    booktitle = {Труды 48-й международной научной
                    конференции аспирантов и
                    студентов},
    abstract = {Нейронные сети активно
                    применяются для решения многих
                    задач, в том числе и для
                    прогнозирования различных
                    событий на основе исторических
                    данных. Одной из таких задач
                    является предсказание дорожного
                    трафика. В настоящей статье
                    рассматриваются способы
                    модификации нейронной сети для
                    получения максимально точного
                    результата. Исследование
                    затронуло как вопрос архитектуры
                    сети, так и вопрос формата подачи
                    данных для обучения. Для работы с
                    нейронной сетью был использован
                    новый фреймворк для глубокого
                    обучения PaddlePadle. Верификация
                    полученных в результате обучения
                    данных проводилась основе
                    среднеквадратичной ошибки
                    модели. Наилучший результат
                    показала простая нейронная сеть с
                    одним скрытым слоем. Другой
                    популярный прием в такого рода
                    задачах — учет значения в смежных
                    узлах, который улучшил
                    предсказание только при условии
                    учета дня недели и использовании
                    данных для формирования прогноза
                    за один час и тридцать минут.},
    year = {2017},
    volume = {4},
    number = {1},
    pages = {339--342},
    issn = {2313-7304},
    url = {http://hdl.handle.net/11701/7026},
    language = {russian},
    category = {coop},
    tags = {elibrary},
    nrefs = {3}
  }
  
  @inproceedings{belezeko2017masnum,
    title = {Применение технологий
                    параллельного программирования в
                    задаче моделирования волн в
                    программе {MASNUM} {WAVE}},
    author = {Белезеко, А. and Кучумов, Р. and
                    Фатькина, А. and Ганкевич, И.},
    editor = {Смирнов, Н.В.},
    publisher = {Издательский дом Федоровой Г.В.},
    address = {СПб, Россия},
    booktitle = {Труды 48-й международной научной
                    конференции аспирантов и
                    студентов},
    abstract = {В данной работе рассмотрена
                    возможность и целесообразность
                    применения двух различных
                    технологий многопроцессорной
                    обработки данных на примере
                    работы программы для
                    моделирования волн Masnum Wave.
                    Произведен анализ быстродействия
                    компонентов Masnum Wave. На основании
                    полученных результатов
                    профилирования была выделена
                    наиболее затратная по времени
                    функция, которая в дальнейшем
                    была модифицирована с
                    ориентацией на параллельное
                    выполнение. В статье представлены
                    результаты применения технологий
                    OpenACC и MPI, а также проанализирована
                    рациональность их использования
                    в рассматриваемой программе с
                    точки зрения быстродействия.
                    Тестирование производилось на
                    суперкомпьютере Sunway TaihuLight —
                    самом производительном
                    суперкомпьютере в мире за 2016 год.},
    year = {2017},
    volume = {4},
    number = {1},
    pages = {321--325},
    issn = {2313-7304},
    url = {http://hdl.handle.net/11701/7026},
    language = {russian},
    category = {coop},
    tags = {elibrary},
    nrefs = {3}
  }
  
  @inproceedings{gankevich2017subord,
    author = {I. Gankevich and Y. Tipikin and V. Korkhov},
    booktitle = {Proceedings of International Conference on High Performance
                    Computing Simulation (HPCS'17)},
    title = {Subordination: Providing Resilience to Simultaneous Failure
                    of Multiple Cluster Nodes},
    year = {2017},
    pages = {832--838},
    publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
    address = {NJ, USA},
    abstract = {In this paper we describe a new framework for creating
                    distributed programmes which are resilient to cluster node
                    failures. Our main goal is to create a simple and reliable
                    model, that ensures continuous execution of parallel
                    programmes without creation of checkpoints, memory dumps and
                    other I/O intensive activities. To achieve this we introduce
                    multi-layered system architecture, each layer of which
                    consists of unified entities organised into hierarchies, and
                    then show how this system handles different node failure
                    scenarios. We benchmark our system on the example of
                    real-world HPC application on both physical and virtual
                    clusters. The results of the experiments show that our
                    approach has low overhead and scales to a large number of
                    cluster nodes.},
    keywords = {telecommunication network reliability;cluster node
                    failures;distributed programmes;memory dumps;multilayered
                    system architecture;multiple cluster nodes;node failure
                    scenarios;parallel programmes;physical clusters;real-world HPC
                    application;reliable model;simple model;simultaneous
                    failure;subordination;virtual clusters;Clustering
                    algorithms;Computational modeling;Fault tolerance;Fault
                    tolerant systems;Kernel;Pipelines;Resilience},
    doi = {10.1109/HPCS.2017.126},
    month = {July},
    tags = {scopus},
    nrefs = {11}
  }
  
  @inproceedings{sveshnikova2017collector,
    author = {S. Sveshnikova and I. Gankevich},
    booktitle = {Proceedings of International Conference on High Performance
                    Computing Simulation (HPCS'17)},
    title = {Using Virtualisation for Reproducible Research and Code
                    Portability},
    year = {2017},
    pages = {891--892},
    publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
    address = {NJ, USA},
    abstract = {Research reproducibility is an emerging topic in computer
                    science. One of the problems in research reproducibility is
                    the absence of tools to reproduce specified operating system
                    with specific version of the software installed. In the
                    proposal reported here we investigate how a tool based on
                    lightweight virtualisation technologies reproduces them. The
                    experiments show that creating reproducible environment adds
                    significant overhead only on the first run of the application,
                    and propose a number of ways to improve the tool.},
    keywords = {Linux;program compilers;software portability;software
                    tools;source code (software);virtualisation;Linux
                    namespace;code portability;computer science;lightweight
                    virtualisation technologies;operating system;reproducible
                    environment;research reproducibility;specific software
                    version;Computational modeling;Linux;Operating
                    systems;Publishing;Tools;Linux cgroups;Linux
                    namespaces;compiler tools;lightweight virtualisation},
    doi = {10.1109/HPCS.2017.139},
    month = {July},
    tags = {scopus},
    nrefs = {6}
  }
  
  @article{gankevich2017middleware,
    author = {Gankevich, I. and Gaiduchok, V. and Korkhov, V. and
                    Degtyarev, A. and Bogdanov, A.},
    title = {Middleware for big data processing: test results},
    journal = {Physics of Particles and Nuclei Letters},
    year = {2017},
    month = {Dec},
    day = {01},
    volume = {14},
    number = {7},
    pages = {1001--1007},
    abstract = {Dealing with large volumes of data is resource-consuming work
                    which is more and more often delegated not only to a single
                    computer but also to a whole distributed computing system at
                    once. As the number of computers in a distributed system
                    increases, the amount of effort put into effective management
                    of the system grows. When the system reaches some critical
                    size, much effort should be put into improving its fault
                    tolerance. It is difficult to estimate when some particular
                    distributed system needs such facilities for a given workload,
                    so instead they should be implemented in a middleware which
                    works efficiently with a distributed system of any size. It is
                    also difficult to estimate whether a volume of data is large
                    or not, so the middleware should also work with data of any
                    volume. In other words, the purpose of the middleware is to
                    provide facilities that adapt distributed computing system for
                    a given workload. In this paper we introduce such middleware
                    appliance. Tests show that this middleware is well-suited for
                    typical HPC and big data workloads and its performance is
                    comparable with well-known alternatives.},
    publisher = {Pleiades Publishing},
    address = {Moscow, Russia},
    issn = {1531-8567},
    doi = {10.1134/S1547477117070068},
    url = {https://doi.org/10.1134/S1547477117070068},
    tags = {scopus,wos},
    nrefs = {24}
  }
  

This file was generated by bibtex2html 1.97.