Ivan Gankevich

Bibliography

@inproceedings{gankevich2010para,
  address = {Санкт-Петербург, Россия},
  author = {Ганкевич, И. Г.},
  booktitle = {Материалы пятой студенческой
                  научно-учебной конференции
                  ``Моделирование явлений в
                  технических и гуманитарных
                  науках'' (СНОО 2010)},
  language = {russian},
  pages = {67--68},
  title = {Параллельные вычисления с
                  использованием {OpenCL}},
  year = {2010},
  category = {hpc}
}
@article{gankevich2010srav,
  author = {Ганкевич, И. Г. and Дегтярев, А. Б. and
                  {Соэ Моэ Лвин}},
  journal = {Морские интеллектуальные
                  технологии},
  language = {russian},
  pages = {10--13},
  title = {Сравнение эффективности
                  применения {MPI} и {OpenCL} для
                  генерации волновой поверхности },
  volume = {4},
  year = {2010},
  category = {oceanwaves}
}
@inproceedings{degtyarev2011effi,
  address = {Yerevan, Armenia},
  author = {Degtyarev, A. and Gankevich, I.},
  booktitle = {Proceedings of 8th International Conference ``Computer
                  Science \& Information Technologies''},
  language = {english},
  pages = {248--251},
  title = {Efficiency Comparison of Wave Surface Generation Using
                  {OpenCL}, {OpenMP} and {MPI}},
  year = {2011},
  category = {oceanwaves}
}
@misc{bogdanov2012cpu,
  address = {Дубна},
  author = {Bogdanov, A. V. and Gankevich, I. G. and Gayduchok, V. Yu.
                  and {Pyae Sone Ko Ko}},
  booktitle = {Distributed Computing and Grid-Technologies in Science and
                  Education: Book of Abstr. Of the 5th Intern. Conf.},
  language = {english},
  pages = {38},
  title = {{CPU} and {GPU} Consolidation Based on {OpenCL} },
  year = {2012},
  category = {hpc},
  tags = {wos}
}
@misc{bogdanov2012prac,
  address = {Dubna, Russia},
  author = {Bogdanov, A. V. and Gankevich, I. G.},
  booktitle = {Distributed Computing and Grid-Technologies in Science and
                  Education: Book of Abstr. Of the 5th Intern. Conf.},
  language = {english},
  pages = {39},
  title = {Practical Efficiency of Optimizing Compilers in Parallel
                  Scientific Applications},
  year = {2012},
  category = {hpc},
  tags = {wos}
}
@misc{bogdanov2012virt,
  address = {Дубна},
  author = {Bogdanov, A. V. and Degtyarev, A. B. and Gaiduchok, V. Yu.
                  and Gankevich, I. G. and Zolotarev, V. I.},
  booktitle = {Distributed Computing and Grid-Technologies in Science and
                  Education: Book of Abstr. Of the 5th Intern. Conf.},
  language = {english},
  pages = {33},
  title = {Virtual Workspace as a Basis of Supercomputer Center},
  year = {2012},
  category = {virtualisation},
  tags = {wos}
}
@inproceedings{degtyarev2012eval,
  address = {Athens, Greece},
  author = {Degtyarev, A. and Gankevich, I.},
  booktitle = {Proceedings of 11th International Conference ``Stability of
                  Ships and Ocean Vehicles''},
  language = {english},
  pages = {841--852},
  title = {Evaluation of hydrodynamic pressures for autoregression model
                  of irregular waves},
  year = {2012},
  category = {oceanwaves}
}
@article{degtiarev2013vychi,
  author = {Дегтярев, А. Б. and Ганкевич, И. Г.},
  journal = {Труды {XLV} НТК ``Проблемы
                  мореходных качеств судов,
                  корабельной гидромеханики и
                  освоения шельфа'' (Крыловские
                  чтения)},
  language = {russian},
  pages = {25--29},
  title = {Вычисление гидродинамических
                  давлений под реальной морской
                  поверхностью на основе
                  авторегрессионной модели
                  нерегулярного волнения},
  year = {2013},
  category = {oceanwaves}
}
@misc{degtyarev2013hydr,
  address = {Dubna, Russia},
  author = {Degtyarev, A. and Gankevich, I.},
  booktitle = {Book of Abstr. of the international conference ``Mathematical
                  Modeling and Computational Physics'' ({MMCP}'13)},
  language = {english},
  pages = {65},
  title = {Hydrodynamic pressure computation under real sea surface on
                  basis of autoregressive model of irregular waves},
  year = {2013},
  category = {oceanwaves}
}
@inproceedings{gankevich2013mic,
  address = {Saint Petersburg, Russia},
  author = {Gankevich, I. G.},
  booktitle = {Proceedings of the {XLIV} annual international conference
                  ``Control Processes and Stability'' ({CPS}'13)},
  language = {english},
  title = {{MIC} architecture performance benchmark in problem of
                  solving stochastic differential equations },
  year = {2013},
  category = {hpc}
}
@inproceedings{gankevich2013mode,
  address = {Yerevan, Armenia},
  author = {Gankevich, Ivan and Degtyarev, Alexander},
  booktitle = {Proceedings of {IX} International Conference on Computer
                  Science and Information Tecnologies ({CSIT}'2013)},
  language = {english},
  pages = {240--244},
  title = {Model of distributed computations in virtual testbed},
  year = {2013},
  category = {hpc}
}
@inproceedings{gankevich2013virt,
  author = {Gankevich, I. and Gaiduchok, V. and Gushchanskiy, D. and
                  Tipikin, Yu. and Korkhov, V. and Degtyarev, A. and Bogdanov,
                  A. and Zolotarev, V.},
  booktitle = {{IEEE} conference publication},
  language = {english},
  pages = {1--6},
  title = {Virtual private supercomputer: Design and evaluation},
  volume = {6710358},
  year = {2013},
  category = {virtualisation},
  tags = {scopus,wos}
}
@inproceedings{ishkulov2013tool,
  author = {Ishkulov, Ilarion and Degtyarev, Alexander and Gankevich,
                  Ivan and Gaiduchok, Vladimir and Tipikin, Yury and Korkhov,
                  Vladimir},
  booktitle = {International Conference on Computer Science and Information
                  Technologies},
  language = {english},
  pages = {298--301},
  title = {Tools for Modelling and Simulation Cloud Computing
                  Infrastructure},
  year = {2013},
  category = {virtualisation}
}
@inproceedings{degtyarev2014calc,
  author = {Degtyarev, A. and Gankevich, I.},
  booktitle = {14th International Ship Stability Workshop},
  language = {english},
  pages = {135--139},
  title = {Calculation Scheme for Wave Pressures with Autoregression
                  Method},
  year = {2014},
  category = {oceanwaves}
}
@misc{gaiduchok2014runn,
  address = {Dubna},
  author = {Gaiduchok, V. and Yuzhanin, N. and Gankevich, I. and
                  Bogdanov, A.},
  booktitle = {Book of abstracts of the 6th International Conference on
                  Distributed Computing and Grid Technologies in Science and
                  Education},
  language = {english},
  pages = {24},
  title = {Running Applications on a Hybrid Cluster},
  year = {2014},
  category = {hpc}
}
@misc{gankevich2014appl,
  address = {Dubna},
  author = {Gankevich, I. and Balyan, S. and Abrahamyan, S. and Korkhov,
                  V.},
  booktitle = {Book of abstracts of the 6th International Conference on
                  Distributed Computing and Grid Technologies in Science and
                  Education},
  language = {english},
  title = {Applications of on-demand virtual clusters to high
                  performance computing},
  year = {2014},
  category = {virtualisation}
}
@article{gankevich2014cons,
  author = {Gankevich, Ivan and Korkhov, Vladimir and Balyan, Serob and
                  Gaiduchok, Vladimir and Gushchanskiy, Dmitry and Tipikin, Yuri
                  and Degtyarev, Alexander and Bogdanov, Alexander},
  journal = {Lecture Notes in Computer Science},
  language = {english},
  pages = {341--354},
  title = {Constructing Virtual Private Supercomputer Using
                  Virtualization and Cloud Technologies},
  volume = {8584},
  year = {2014},
  category = {virtualisation},
  tags = {scopus,wos}
}
@misc{gankevich2014effi,
  address = {Dubna},
  author = {Gankevich, I. and Degtyarev, A.},
  booktitle = {Book of abstracts of the 6th International Conference on
                  Distributed Computing and Grid Technologies in Science and
                  Education},
  language = {english},
  pages = {25},
  title = {Efficient processing and classification of wave energy
                  spectrum data with a distributed pipeline},
  year = {2014},
  category = {hpc}
}
@article{gankevich2014meto,
  author = {Ганкевич, И. Г. and Дегтярев, А. Б.},
  journal = {Процессы управления и
                  устойчивость},
  language = {russian},
  number = {17},
  pages = {295--300},
  title = {Методы распределения нагрузки на
                  многопроцессорную систему },
  volume = {1},
  year = {2014},
  category = {hpc}
}
@article{gankevich2015spec,
  title = {Efficient processing and classification of wave energy
                  spectrum data with a distributed pipeline},
  author = {Ivan Gankevich and Alexander Degtyarev},
  journal = {Computer Research and Modeling},
  publisher = {Institute of Computer Science},
  volume = {7},
  number = {3},
  pages = {517--520},
  year = {2015},
  keywords = {distributed system, big data, data processing, parallel
                  computing},
  abstract = {Processing of large amounts of data often consists of several
                  steps, e.g. pre- and post-processing stages, which are
                  executed sequentially with data written to disk after each
                  step, however, when pre-processing stage for each task is
                  different the more efficient way of processing data is to
                  construct a pipeline which streams data from one stage to
                  another. In a more general case some processing stages can be
                  factored into several parallel subordinate stages thus forming
                  a distributed pipeline where each stage can have multiple
                  inputs and multiple outputs. Such processing pattern emerges
                  in a problem of classification of wave energy spectra based on
                  analytic approximations which can extract different wave
                  systems and their parameters (e.g. wave system type, mean wave
                  direction) from spectrum. Distributed pipeline approach
                  achieves good performance compared to conventional
                  “sequential-stage” processing. },
  url = {http://crm-en.ics.org.ru/journal/article/2301/},
  language = {english},
  category = {hpc},
  tags = {elibrary}
}
@inbook{bogdanov2015vsc,
  title = {Virtual Supercomputer as basis of Scientific Computing},
  author = {Alexander Bogdanov and Alexander Degtyarev and Vladimir
                  Korkhov and Vladimir Gaiduchok and Ivan Gankevich},
  editor = {Thomas S. Clary},
  year = {2015},
  chapter = {5},
  volume = {11},
  pages = {159--198},
  publisher = {Nova Science Publishers},
  booktitle = {Horizons in Computer Science Research},
  isbn = {978-1-63482-499-6},
  abstract = {Nowadays supercomputer centers strive to provide their
                  computational resources as services, however, present
                  infrastructure is not particularly suited for such a use.
                  First of all, there are standard application programming
                  interfaces to launch computational jobs via command line or a
                  web service, which work well for a program but turn out to be
                  too complex for scientists: they want applications to be
                  delivered to them from a remote server and prefer to interact
                  with them via graphical interface. Second, there are certain
                  applications which are dependent on older versions of
                  operating systems and libraries and it is either non-practical
                  to install those old systems on a cluster or there exists some
                  conflict between these dependencies. Virtualization
                  technologies can solve this problem, but they are not too
                  popular in scientific computing due to overheads introduced by
                  them. Finally, it is difficult to automatically estimate
                  optimal resource pool size for a particular task, thus it
                  often gets done manually by a user. If the large resource pool
                  is requested for a minor task, the efficiency degrades.
                  Moreover, cluster schedulers depend on estimated wall time to
                  execute the jobs and since it cannot be reliably predicted by
                  a human or a machine their efficiency suffers as well.
                  
                  Applications delivery, efficient operating system
                  virtualization and dynamic application resource pool size
                  defining constitute the two problems of scientific computing:
                  complex application interfaces and inefficient use of
                  resources available --- and virtual supercomputer is the way
                  to solve them. The research shows that there are ways to make
                  virtualization technologies efficient for scientific
                  computing: the use of lightweight application containers and
                  dynamic creation of these containers for a particular job are
                  both fast and transparent for a user. There are universal ways
                  to deliver application output to a front-end using execution
                  of a job on a cluster and presenting its results in a
                  graphical form. Finally, an application framework can be
                  developed to decompose parallel application into small
                  independent parts with easily predictable execution time, to
                  simplify scheduling via existing algorithms.
                  
                  The aim of this chapter is to promote the key idea of a
                  virtual supercomputer: to harness all available HPC resources
                  and provide users with convenient access to them. Such a
                  challenge can be effectively faced using contemporary
                  virtualization technologies. They can materialize the
                  long-term dream of having a supercomputer at your own desk. },
  language = {english},
  category = {virtualisation},
  tags = {scopus}
}
@article{bogdanov2015hybrid,
  title = {Running applications on a hybrid cluster},
  author = {Bogdanov, A. and Gankevich, I. and Gayduchok V. and Yuzhanin,
                  N.},
  journal = {Computer Research and Modeling},
  publisher = {Institute of Computer Science},
  volume = {7},
  number = {3},
  pages = {475--483},
  year = {2015},
  keywords = {GPGPU, HPC, computational clusters, OpenFOAM, LINPACK,
                  ViennaCL, CUDA, OpenCL},
  abstract = {A hybrid cluster implies the use of computational devices
                  with radically different architectures. Usually, these are
                  conventional CPU architecture (e.g. {x86_64}) and GPU
                  architecture (e. g. NVIDIA CUDA). Creating and exploiting such
                  a cluster requires some experience: in order to harness all
                  computational power of the described system and get
                  substantial speedup for computational tasks many factors
                  should be taken into account. These factors consist of
                  hardware characteristics (e.g. network infrastructure, a type
                  of data storage, GPU architecture) as well as software stack
                  (e.g. MPI implementation, GPGPU libraries). So, in order to
                  run scientific applications GPU capabilities, software
                  features, task size and other factors should be considered.
                  
                  This report discusses opportunities and problems of hybrid
                  computations. Some statistics from tests programs and
                  applications runs will be demonstrated. The main focus of
                  interest is open source applications (e. g. OpenFOAM) that
                  support GPGPU (with some parts rewritten to use GPGPU directly
                  or by replacing libraries).
                  
                  There are several approaches to organize heterogeneous
                  computations for different GPU architectures out of which CUDA
                  library and OpenCL framework are compared. CUDA library is
                  becoming quite typical for hybrid systems with NVIDIA cards,
                  but OpenCL offers portability opportunities which can be a
                  determinant factor when choosing framework for development. We
                  also put emphasis on multi-GPU systems that are often used to
                  build hybrid clusters. Calculations were performed on a hybrid
                  cluster of SPbU computing center. },
  url = {http://crm-en.ics.org.ru/journal/article/2295/},
  language = {english},
  category = {coop},
  tags = {elibrary}
}
@article{gankevich2015virtcluster,
  title = {Applications of on-demand virtual clusters to high
                  performance computing},
  author = {Gankevich, I. and Balyan, S. and Abrahamyan, S. and Korkhov,
                  V.},
  journal = {Computer Research and Modeling},
  publisher = {Institute of Computer Science},
  volume = {7},
  number = {3},
  pages = {511--516},
  year = {2015},
  keywords = {virtual machine, lightweight virtualisation, application
                  containers},
  abstract = {Virtual machines are usually associated with an ability to
                  create them on demand by calling web services, then these
                  machines are used to deliver resident services to their
                  clients; however, providing clients with an ability to run an
                  arbitrary programme on the newly created machines is beyond
                  their power. Such kind of usage is useful in a high
                  performance computing environment where most of the resources
                  are consumed by batch programmes and not by daemons or
                  services. In this case a cluster of virtual machines is
                  created on demand to run a distributed or parallel programme
                  and to save its output to a network attached storage. Upon
                  completion this cluster is destroyed and resources are
                  released. With certain modifications this approach can be
                  extended to interactively deliver computational resources to
                  the user thus providing virtual desktop as a service.
                  Experiments show that the process of creating virtual clusters
                  on demand can be made efficient in both cases. },
  url = {http://crm-en.ics.org.ru/journal/article/2300/},
  language = {english},
  category = {virtualisation},
  tags = {elibrary}
}
@article{yuzhanin2015tasktrack,
  title = {Computational task tracking complex in the scientific project
                  informational support system},
  author = {Yuzhanin, N. and Tipikin, Yu. and Gankevich, I. and Zolotarev
                  V.},
  journal = {Computer Research and Modeling},
  publisher = {Institute of Computer Science},
  volume = {7},
  number = {3},
  pages = {615--620},
  year = {2015},
  keywords = {service desk, task tracking, HPC, web service},
  abstract = {This work describes the idea of the system of informational
                  support for the scientific projects and the development of
                  computational task tracking complex. Due to large requirements
                  for computational experiments the problem of presentation of
                  the information about HPC tasks becomes one of the most
                  important. Nonstandard usage of the service desk system as a
                  basis of the computational task tracking and support system
                  can be the solution of this problem. Particular attention is
                  paid to the analysis and the satisfaction of the conflicting
                  requirements to the task tracking complex from the different
                  user groups. Besides the web service kit used for the
                  integration of the task tracking complex and the datacenter
                  environment is considered. This service kit became the main
                  interconnect between the parts of the scientific project
                  support system and also this kit allows to reconfigure the
                  whole system quickly and safely. },
  url = {http://crm-en.ics.org.ru/journal/article/2316/},
  language = {english},
  category = {coop},
  tags = {scopus}
}
@inproceedings{gankevich2015subord,
  author = {Gankevich, Ivan and Tipikin, Yuri and Gaiduchok, Vladimir},
  booktitle = {International Conference on High Performance Computing
                  Simulation (HPCS)},
  title = {Subordination: Cluster management without distributed
                  consensus},
  year = {2015},
  pages = {639--642},
  keywords = {Clustering algorithms;Computers;Heuristic algorithms;IP
                  networks;Network topology;Nominations and
                  elections;Topology;cluster accounting;cluster
                  management;cluster monitoring;job scheduling;leader election},
  doi = {10.1109/HPCSim.2015.7237106},
  abstract = {Nowadays, many cluster management systems rely on distributed
                  consensus algorithms to elect a leader that orchestrates
                  subordinate nodes. Contrary to these studies we propose
                  consensus-free algorithm that arranges cluster nodes into
                  multiple levels of subordination. The algorithm structures IP
                  address range of cluster network so that each node has ranked
                  list of candidates, from which it chooses a leader. The
                  results show that this approach easily scales to a large
                  number of nodes due to its asynchronous nature, and enables
                  fast recovery from node failures as they occur only on one
                  level of hierarchy. Multiple levels of subordination are
                  useful for efficiently collecting monitoring and accounting
                  data from large number of nodes, and for scheduling
                  general-purpose tasks on a cluster. },
  note = {Outstanding poster paper award.},
  language = {english},
  category = {hpc},
  tags = {scopus,wos}
}
@incollection{bogdanov2015sched,
  year = {2015},
  isbn = {978-3-319-21409-2},
  booktitle = {Computational Science and Its Applications -- ICCSA 2015},
  volume = {9158},
  series = {Lecture Notes in Computer Science},
  editor = {Gervasi, Osvaldo and Murgante, Beniamino and Misra, Sanjay
                  and Gavrilova, Marina L. and Rocha, Ana Maria Alves Coutinho
                  and Torre, Carmelo and Taniar, David and Apduhan, Bernady O.},
  doi = {10.1007/978-3-319-21410-8_23},
  title = {Profiling Scheduler for Efficient Resource Utilization},
  publisher = {Springer International Publishing},
  keywords = {Computational cluster; Scheduler; HPC; Profiling; Resource
                  sharing; Load balancing; Networking},
  author = {Bogdanov, Alexander and Gaiduchok, Vladimir and Ahmed, Nabil
                  and Cubahiro, Amissi and Gankevich, Ivan},
  pages = {299--310},
  abstract = {Optimal resource utilization is one of the most important and
                  most challenging tasks for computational centers. A typical
                  contemporary center includes several clusters. These clusters
                  are used by many clients. So, administrators should set
                  resource sharing policies that will meet different
                  requirements of different groups of users. Users want to
                  compute their tasks fast while organizations want their
                  resources to be utilized efficiently. Traditional schedulers
                  do not allow administrator to efficiently solve these problems
                  in that way. Dynamic resource reallocation can improve the
                  efficiency of system utilization while profiling running
                  applications can generate important statistical data that can
                  be used in order to optimize future application usage. These
                  are basic advantages of a new scheduler that are discussed in
                  this paper. },
  language = {english},
  category = {coop},
  tags = {scopus,wos}
}
@inproceedings{yuzhanin2015asian,
  author = {Yuzhanin, Artur and Gankevich, Ivan and Stepanov, Eduard and
                  Korkhov, Vladimir},
  booktitle = {International Conference on High Performance Computing
                  Simulation (HPCS)},
  title = {Efficient Asian option pricing with CUDA},
  year = {2015},
  pages = {623--628},
  keywords = {Arrays;Graphics processing units;Instruction
                  sets;Kernel;Mathematical model;Pricing;Random variables},
  doi = {10.1109/HPCSim.2015.7237103},
  abstract = {In this paper the Monte Carlo methods of the Asian option
                  pricing are considered. Among them are pricing method with
                  path integral and partial differential equation. Simulation
                  algorithms running on the CPU sequentially and algorithms
                  running on the GPU in parallel using the CUDA technology were
                  analyzed and compared. },
  language = {english},
  category = {coop},
  tags = {scopus,wos}
}
@incollection{gankevich2015novel,
  year = {2015},
  isbn = {978-3-319-21409-2},
  booktitle = {Computational Science and Its Applications -- ICCSA 2015},
  volume = {9158},
  series = {Lecture Notes in Computer Science},
  editor = {Gervasi, Osvaldo and Murgante, Beniamino and Misra, Sanjay
                  and Gavrilova, Marina L. and Rocha, Ana Maria Alves Coutinho
                  and Torre, Carmelo and Taniar, David and Apduhan, Bernady O.},
  doi = {10.1007/978-3-319-21410-8_20},
  title = {Novel Approaches for Distributing Workload on Commodity
                  Computer Systems},
  publisher = {Springer International Publishing},
  keywords = {Long-lived transactions; Distributed pipeline; Node
                  discovery; Software engineering; Distributed computing;
                  Cluster computing},
  author = {Gankevich, Ivan and Tipikin, Yuri and Degtyarev, Alexander
                  and Korkhov, Vladimir},
  pages = {259--271},
  language = {english},
  category = {hpc},
  tags = {scopus,wos}
}
@article{degtyarev2015hydr,
  author = {Degtyarev, A. and Gankevich, I.},
  journal = {Physics of Particles and Nuclei Letters},
  publisher = {Pleiades Publishing},
  language = {english},
  number = {3},
  pages = {389--391},
  title = {Hydrodynamic Pressure Computation under Real Sea Surface on
                  Basis of Autoregressive Model of Irregular Waves},
  volume = {12},
  year = {2015},
  doi = {10.1134/S1547477115030073},
  category = {oceanwaves},
  abstract = {Determining the impact of external excitations on a dynamic
                  marine object such as ship hull in a seaway is the main goal
                  of simulations. Now such simulations is most often based on
                  approximate mathematical models that use results of the theory
                  of small amplitude waves. The most complicated software for
                  marine objects behavior simulation LAMP IV uses numerical
                  solution of traditional hydrodynamic problem without often
                  used approximations but on the basis of theory of small
                  amplitude waves. For efficiency reasons these simulations can
                  be based on autoregressive model to generate real wave
                  surface. Such a surface possesses all the hydrodynamic
                  characteristics of sea waves, preserves dispersion relation
                  and also shows superior performance compared to other wind
                  wave models. Naturally, the known surface can be used to
                  compute velocity field and in turn to determine pressures in
                  any point under sea surface. The resulting computational
                  algorithm can be used to determine pressures without use of
                  theory of small-amplitude waves. },
  tags = {scopus,wos}
}
@inproceedings{gaiduchok2016network,
  editor = {Vladimir Korenkov and Tatiana Zaikina and Andrey
                  Nechaevskiy},
  author = {Bogdanov, A. and Gaiduchok, V. and Ahmed, N. and Ivanov, P.
                  and Gankevich, I.},
  title = {Improving networking performance of a Linux cluster},
  issn = {1613-0073},
  publisher = {CEUR-WS.org},
  series = {CEUR Workshop Proceedings},
  booktitle = {Proceedings of the 7th International Conference Distributed
                  Computing and Grid-technologies in Science and Education
                  2016},
  url = {http://ceur-ws.org/Vol-1787/},
  volume = {1787},
  year = {2016},
  language = {english},
  category = {coop},
  tags = {scopus}
}
@inbook{degtyarev2016balance,
  author = {Degtyarev, Alexander and Gankevich, Ivan},
  editor = {Gavrilova, L. Marina and Tan, Kenneth C. J.},
  title = {Balancing Load on a Multiprocessor System with Event-Driven
                  Approach},
  booktitle = {Transactions on Computational Science XXVII},
  year = {2016},
  publisher = {Springer Berlin Heidelberg},
  address = {Berlin, Heidelberg},
  pages = {35--52},
  isbn = {978-3-662-50412-3},
  doi = {10.1007/978-3-662-50412-3_3},
  language = {english},
  category = {hpc},
  tags = {scopus}
}
@inbook{yuzhanin2016servicedesk,
  author = {Bogdanov, A. V. and Gaiduchok, V. Yu. and Gankevich, I. G.
                  and Tipikin, Yu. A. and Yuzhanin, N. V.},
  editor = {Gavrilova, Marina L. and Tan, C.J. Kenneth},
  title = {The Use of Service Desk System to Keep Track of Computational
                  Tasks on Supercomputers},
  booktitle = {Transactions on Computational Science XXVII},
  year = {2016},
  publisher = {Springer Berlin Heidelberg},
  address = {Berlin, Heidelberg},
  pages = {1--9},
  isbn = {978-3-662-50412-3},
  doi = {10.1007/978-3-662-50412-3_1},
  language = {english},
  category = {hpc},
  tags = {scopus}
}
@inbook{gankevich2016master,
  author = {Gankevich, Ivan and Tipikin, Yuri and Korkhov, Vladimir and
                  Gaiduchok, Vladimir and Degtyarev, Alexander and Bogdanov,
                  Alexander},
  editor = {Gervasi, Osvaldo and Murgante, Beniamino and Misra, Sanjay
                  and Rocha, A.C. Ana Maria and Torre, M. Carmelo and Taniar,
                  David and Apduhan, O. Bernady and Stankova, Elena and Wang,
                  Shangguang},
  title = {Factory: Master Node High-Availability for Big Data
                  Applications and Beyond},
  booktitle = {Computational Science and Its Applications -- ICCSA 2016:
                  16th International Conference, Beijing, China, July 4-7, 2016,
                  Proceedings, Part II},
  year = {2016},
  publisher = {Springer International Publishing},
  pages = {379--389},
  isbn = {978-3-319-42108-7},
  doi = {10.1007/978-3-319-42108-7_29},
  language = {english},
  category = {hpc},
  tags = {scopus,wos}
}
@inproceedings{sveshnikova2016dnn,
  author = {E. Milova and S. Sveshnikova and I. Gankevich},
  booktitle = {2016 International Conference on High Performance Computing
                  Simulation (HPCS)},
  title = {Speedup of deep neural network learning on the
                  MIC-architecture},
  year = {2016},
  pages = {989--992},
  keywords = {coprocessors;iterative methods;learning (artificial
                  intelligence);multiprocessing systems;neural nets;parallel
                  processing;storage management;MIC architecture;code
                  vectorization;computational power;coprocessor data transfer
                  model;deep neural network learning;implicit memory
                  copying;iterative process;many integrated core
                  architecture;memory blocks;parallelization method;Biological
                  neural networks;Computer architecture;Coprocessors;Data
                  models;Data transfer;Microwave integrated
                  circuits;Optimization;DNN;Xeon Phi;coprocessor;many-core
                  architecture;offload;optimisation;parallel
                  computing;vectorization},
  abstract = {Deep neural networks are more accurate, but require more
                  computational power in the learning process. Moreover, it is
                  an iterative process. The goal of the research is to
                  investigate efficiency of solving this problem on MIC
                  architecture without changing baseline algorithm. Well-known
                  code vectorization and parallelization methods are used to
                  increase the effectiveness of the program on MIC architecture.
                  In the course of the experiments we test two coprocessor data
                  transfer models: explicit and implicit one. We show that
                  implicit memory copying is more efficient than explicit one,
                  because only modified memory blocks are copied. MIC
                  architecture shows competitive performance compared to
                  multi-core x86 processor.},
  doi = {10.1109/HPCSim.2016.7568443},
  month = {July},
  language = {english},
  category = {coop},
  tags = {scopus,wos}
}
@inproceedings{gankevich2016factory,
  author = {I. Gankevich and Y. Tipikin and V. Korkhov and V. Gaiduchok},
  booktitle = {International Conference on High Performance Computing
                  Simulation (HPCS'16)},
  title = {Factory: Non-stop batch jobs without checkpointing},
  year = {2016},
  pages = {979--984},
  keywords = {parallel processing;software fault tolerance;checkpoint
                  mechanisms;computational microkernels;factory;hydrodynamics
                  HPC application;node failures;nonstop batch
                  jobs;Computers;Fault tolerance;Fault tolerant
                  systems;Kernel;Libraries;Message passing;Production
                  facilities;cluster computing;distributed computing;fault
                  tolerance;job scheduling;parallel computing},
  abstract = {Nowadays many job schedulers rely on checkpoint mechanisms to
                  make long-running batch jobs resilient to node failures. At
                  large scale stopping a job and creating its image consumes
                  considerable amount of time. The aim of this study is to
                  propose a method that eliminates this overhead. For this
                  purpose we decompose a problem being solved into computational
                  microkernels which have strict hierarchical dependence on each
                  other. When a kernel abruptly stops its execution due to a
                  node failure, it is responsibility of its principal to restart
                  computation on a healthy node. In the course of experiments we
                  successfully applied this method to make hydrodynamics HPC
                  application run on constantly changing number of nodes. We
                  believe, that this technique can be generalised to other types
                  of scientific applications as well.},
  doi = {10.1109/HPCSim.2016.7568441},
  month = {July},
  language = {english},
  category = {hpc},
  tags = {scopus,wos}
}
@inproceedings{sveshnikova2016mic,
  title = {Ускорение обучения глубокой
                  нейронной сети путем оптимизации
                  алгоритма для запуска на MIC
                  архитектуре},
  author = {Милова, Е.А. and Свешникова, С.Ю. and
                  Ганкевич, И.Г.},
  editor = {Смирнов, Н.В.},
  publisher = {Издательский дом Федоровой Г.В.},
  address = {СПб, Россия},
  booktitle = {Труды 47-й международной научной
                  конференции аспирантов и
                  студентов},
  abstract = {Глубокие нейронные сети
                  позволяют получить высокую
                  точность распознавания, но
                  требуют больших вычислительных
                  мощностей и временных затрат в
                  процессе обучения. Второе связано
                  с тем, что алгоритм обучения
                  является итеративным. Целью
                  исследования является изучение
                  эффективности решения данной
                  задачи на MIC архитектуре без
                  изменения базового алгоритма. Для
                  повышения эффективности работы
                  программы на MIC архитектуре были
                  использованы приемы векторизации
                  и распараллеливания кода. В
                  процессе исследования были
                  опробованы 2 модели передачи
                  данных на сопроцессор: явная и
                  неявная и проведено сравнение их
                  эффективности. Рассмотрены
                  причины, влияющие на
                  эффективность распараллеливания
                  данной задачи. MIC архитектура
                  показала показала
                  производительность, сравнимую с
                  многоядерным процессором. },
  year = {2016},
  volume = {3},
  number = {1},
  pages = {367--371},
  issn = {2313-7304},
  url = {http://hdl.handle.net/11701/2609},
  language = {russian},
  category = {coop},
  tags = {elibrary}
}
@inbook{korkhov2017ascheduler,
  author = {Korkhov, Vladimir and Gankevich, Ivan and Iakushkin, Oleg and
                  Gushchanskiy, Dmitry and Khmel, Dmitry and Ivashchenko, Andrey
                  and Pyayt, Alexander and Zobnin, Sergey and Loginov,
                  Alexander},
  editor = {Gervasi, Osvaldo and Murgante, Beniamino and Misra, Sanjay
                  and Borruso, Giuseppe and Torre, Carmelo M. and Rocha, Ana
                  Maria A.C. and Taniar, David and Apduhan, Bernady O. and
                  Stankova, Elena and Cuzzocrea, Alfredo},
  title = {Distributed Data Processing on Microcomputers with Ascheduler
                  and Apache Spark},
  booktitle = {Computational Science and Its Applications -- ICCSA 2017:
                  Proceedings of 17th International Conference, Part V},
  year = {2017},
  month = {July},
  publisher = {Springer International Publishing},
  address = {Cham},
  pages = {387--398},
  abstract = {Modern architectures of data acquisition and processing often
                  consider low-cost and low-power devices that can be bound
                  together to form a distributed infrastructure. In this paper
                  we overview possibilities to organize a distributed computing
                  testbed based on microcomputers similar to Raspberry Pi and
                  Intel Edison. The goal of the research is to investigate and
                  develop a scheduler for orchestrating distributed data
                  processing and general purpose computations on such unreliable
                  and resource-constrained hardware. Also we consider
                  integration of the scheduler with well-known distributed data
                  processing framework Apache Spark. We outline the project
                  carried out in collaboration with Siemens LLC to compare
                  different configurations of the hardware and software
                  deployment and evaluate performance and applicability of the
                  tools to the testbed.},
  isbn = {978-3-319-62404-4},
  doi = {10.1007/978-3-319-62404-4_28},
  language = {english},
  category = {hpc},
  tags = {scopus},
  nrefs = {10}
}
@inbook{ivashchenko2017gpulab,
  author = {Ivashchenko, Andrei and Belezeko, Alexey and Gankevich, Ivan
                  and Korkhov, Vladimir and Kulabukhova, Nataliia},
  editor = {Gervasi, Osvaldo and Murgante, Beniamino and Misra, Sanjay
                  and Borruso, Giuseppe and Torre, Carmelo M. and Rocha, Ana
                  Maria A.C. and Taniar, David and Apduhan, Bernady O. and
                  Stankova, Elena and Cuzzocrea, Alfredo},
  title = {Acceleration of Computing and Visualization Processes with
                  OpenCL for Standing Sea Wave Simulation Model},
  booktitle = {Computational Science and Its Applications -- ICCSA 2017:
                  Proceedings 17th International Conference, Part V},
  year = {2017},
  month = {July},
  publisher = {Springer International Publishing},
  address = {Cham},
  pages = {505--518},
  abstract = {In this paper we highlight one of the possible acceleration
                  approaches for the standing wave model simulation model with
                  the use of OpenCL framework for GPGPU computations. We provide
                  a description of the wave's mathematical model, an explanation
                  for the technology selection, as well as the identification of
                  the algorithm part that can be accelerated. The text also
                  contains a description of solution's performance evaluation
                  stage being compared with CPU-only program. The influence of
                  OpenCL usage for improvements in rendering process is also
                  shown here. Finally, possible ways of application improvement
                  and further development are also considered.},
  isbn = {978-3-319-62404-4},
  doi = {10.1007/978-3-319-62404-4_38},
  language = {english},
  category = {hpc},
  tags = {scopus},
  nrefs = {15}
}
@inproceedings{gavrikov2017paddle,
  title = {Нейронные сети в задаче
                  предсказания дорожного трафика},
  author = {Гавриков, А. and Свешникова, С. and
                  Ганкевич, И.},
  editor = {Смирнов, Н.В.},
  publisher = {Издательский дом Федоровой Г.В.},
  address = {СПб, Россия},
  booktitle = {Труды 48-й международной научной
                  конференции аспирантов и
                  студентов},
  abstract = {Нейронные сети активно
                  применяются для решения многих
                  задач, в том числе и для
                  прогнозирования различных
                  событий на основе исторических
                  данных. Одной из таких задач
                  является предсказание дорожного
                  трафика. В настоящей статье
                  рассматриваются способы
                  модификации нейронной сети для
                  получения максимально точного
                  результата. Исследование
                  затронуло как вопрос архитектуры
                  сети, так и вопрос формата подачи
                  данных для обучения. Для работы с
                  нейронной сетью был использован
                  новый фреймворк для глубокого
                  обучения PaddlePadle. Верификация
                  полученных в результате обучения
                  данных проводилась основе
                  среднеквадратичной ошибки
                  модели. Наилучший результат
                  показала простая нейронная сеть с
                  одним скрытым слоем. Другой
                  популярный прием в такого рода
                  задачах — учет значения в смежных
                  узлах, который улучшил
                  предсказание только при условии
                  учета дня недели и использовании
                  данных для формирования прогноза
                  за один час и тридцать минут.},
  year = {2017},
  volume = {4},
  number = {1},
  pages = {339--342},
  issn = {2313-7304},
  url = {http://hdl.handle.net/11701/7026},
  language = {russian},
  category = {coop},
  tags = {elibrary},
  nrefs = {3}
}
@inproceedings{belezeko2017masnum,
  title = {Применение технологий
                  параллельного программирования в
                  задаче моделирования волн в
                  программе {MASNUM} {WAVE}},
  author = {Белезеко, А. and Кучумов, Р. and
                  Фатькина, А. and Ганкевич, И.},
  editor = {Смирнов, Н.В.},
  publisher = {Издательский дом Федоровой Г.В.},
  address = {СПб, Россия},
  booktitle = {Труды 48-й международной научной
                  конференции аспирантов и
                  студентов},
  abstract = {В данной работе рассмотрена
                  возможность и целесообразность
                  применения двух различных
                  технологий многопроцессорной
                  обработки данных на примере
                  работы программы для
                  моделирования волн Masnum Wave.
                  Произведен анализ быстродействия
                  компонентов Masnum Wave. На основании
                  полученных результатов
                  профилирования была выделена
                  наиболее затратная по времени
                  функция, которая в дальнейшем
                  была модифицирована с
                  ориентацией на параллельное
                  выполнение. В статье представлены
                  результаты применения технологий
                  OpenACC и MPI, а также проанализирована
                  рациональность их использования
                  в рассматриваемой программе с
                  точки зрения быстродействия.
                  Тестирование производилось на
                  суперкомпьютере Sunway TaihuLight —
                  самом производительном
                  суперкомпьютере в мире за 2016 год.},
  year = {2017},
  volume = {4},
  number = {1},
  pages = {321--325},
  issn = {2313-7304},
  url = {http://hdl.handle.net/11701/7026},
  language = {russian},
  category = {coop},
  tags = {elibrary},
  nrefs = {3}
}
@inproceedings{gankevich2017subord,
  author = {I. Gankevich and Y. Tipikin and V. Korkhov},
  booktitle = {Proceedings of International Conference on High Performance
                  Computing Simulation (HPCS'17)},
  title = {Subordination: Providing Resilience to Simultaneous Failure
                  of Multiple Cluster Nodes},
  year = {2017},
  pages = {832--838},
  publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
  address = {NJ, USA},
  abstract = {In this paper we describe a new framework for creating
                  distributed programmes which are resilient to cluster node
                  failures. Our main goal is to create a simple and reliable
                  model, that ensures continuous execution of parallel
                  programmes without creation of checkpoints, memory dumps and
                  other I/O intensive activities. To achieve this we introduce
                  multi-layered system architecture, each layer of which
                  consists of unified entities organised into hierarchies, and
                  then show how this system handles different node failure
                  scenarios. We benchmark our system on the example of
                  real-world HPC application on both physical and virtual
                  clusters. The results of the experiments show that our
                  approach has low overhead and scales to a large number of
                  cluster nodes.},
  keywords = {telecommunication network reliability;cluster node
                  failures;distributed programmes;memory dumps;multilayered
                  system architecture;multiple cluster nodes;node failure
                  scenarios;parallel programmes;physical clusters;real-world HPC
                  application;reliable model;simple model;simultaneous
                  failure;subordination;virtual clusters;Clustering
                  algorithms;Computational modeling;Fault tolerance;Fault
                  tolerant systems;Kernel;Pipelines;Resilience},
  doi = {10.1109/HPCS.2017.126},
  month = {July},
  tags = {scopus},
  nrefs = {11}
}
@inproceedings{sveshnikova2017collector,
  author = {S. Sveshnikova and I. Gankevich},
  booktitle = {Proceedings of International Conference on High Performance
                  Computing Simulation (HPCS'17)},
  title = {Using Virtualisation for Reproducible Research and Code
                  Portability},
  year = {2017},
  pages = {891--892},
  publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
  address = {NJ, USA},
  abstract = {Research reproducibility is an emerging topic in computer
                  science. One of the problems in research reproducibility is
                  the absence of tools to reproduce specified operating system
                  with specific version of the software installed. In the
                  proposal reported here we investigate how a tool based on
                  lightweight virtualisation technologies reproduces them. The
                  experiments show that creating reproducible environment adds
                  significant overhead only on the first run of the application,
                  and propose a number of ways to improve the tool.},
  keywords = {Linux;program compilers;software portability;software
                  tools;source code (software);virtualisation;Linux
                  namespace;code portability;computer science;lightweight
                  virtualisation technologies;operating system;reproducible
                  environment;research reproducibility;specific software
                  version;Computational modeling;Linux;Operating
                  systems;Publishing;Tools;Linux cgroups;Linux
                  namespaces;compiler tools;lightweight virtualisation},
  doi = {10.1109/HPCS.2017.139},
  month = {July},
  tags = {scopus},
  nrefs = {6}
}
@article{gankevich2017middleware,
  author = {Gankevich, I. and Gaiduchok, V. and Korkhov, V. and
                  Degtyarev, A. and Bogdanov, A.},
  title = {Middleware for big data processing: test results},
  journal = {Physics of Particles and Nuclei Letters},
  year = {2017},
  month = {Dec},
  day = {01},
  volume = {14},
  number = {7},
  pages = {1001--1007},
  abstract = {Dealing with large volumes of data is resource-consuming work
                  which is more and more often delegated not only to a single
                  computer but also to a whole distributed computing system at
                  once. As the number of computers in a distributed system
                  increases, the amount of effort put into effective management
                  of the system grows. When the system reaches some critical
                  size, much effort should be put into improving its fault
                  tolerance. It is difficult to estimate when some particular
                  distributed system needs such facilities for a given workload,
                  so instead they should be implemented in a middleware which
                  works efficiently with a distributed system of any size. It is
                  also difficult to estimate whether a volume of data is large
                  or not, so the middleware should also work with data of any
                  volume. In other words, the purpose of the middleware is to
                  provide facilities that adapt distributed computing system for
                  a given workload. In this paper we introduce such middleware
                  appliance. Tests show that this middleware is well-suited for
                  typical HPC and big data workloads and its performance is
                  comparable with well-known alternatives.},
  publisher = {Pleiades Publishing},
  address = {Moscow, Russia},
  issn = {1531-8567},
  doi = {10.1134/S1547477117070068},
  tags = {scopus,wos},
  nrefs = {24}
}
@inbook{gankevich2018ocean,
  author = {Gankevich, Ivan and Degtyarev, Alexander},
  editor = {Velarde, Manuel G. and Tarakanov, Roman Yu. and Marchenko,
                  Alexey V.},
  title = {Simulation of Standing and Propagating Sea Waves with
                  Three-Dimensional ARMA Model},
  booktitle = {The Ocean in Motion: Circulation, Waves, Polar Oceanography},
  year = {2018},
  publisher = {Springer International Publishing},
  address = {Cham},
  pages = {249--278},
  abstract = {Simulation of sea waves is a problem appearing in the
                  framework of developing software-based ship motion modelling
                  applications. These applications generally use linear wave
                  theory to generate small-amplitude waves programmatically and
                  determine impact of external excitations on a ship hull. Using
                  linear wave theory is feasible for ocean waves, but is not
                  accurate for shallow-water and storm waves. To cope with these
                  shortcomings we introduce autoregressive moving-average (ARMA)
                  model, which is widely known in oceanography, but rarely used
                  for sea wave modelling. The new model allows to generate waves
                  of arbitrary amplitudes, is accurate for both shallow and deep
                  water, and its software implementation shows superior
                  performance by relying on fast Fourier transform family of
                  algorithms. Integral characteristics of wavy surface produced
                  by ARMA model are verified against the ones of real sea
                  surface. Despite all its advantages, ARMA model requires a new
                  method to determine wave pressures, an instance of which is
                  included in the chapter.},
  isbn = {978-3-319-71934-4},
  doi = {10.1007/978-3-319-71934-4_18}
}

This file was generated by bibtex2html 1.97.