@techreport{oai:jaxa.repo.nii.ac.jp:00043510,
 author = {中田, 登志之 and 加納, 健 and 小池, 誠彦 and 奥村, 秀人 and 大竹, 邦彦 and 中村, 孝 and 福田, 正大 and Nakata, Toshiyuki and Kanoh, Yasushi and Koike, Nobuhiiko and Okumura, Hidehito and Ootake, Kunihiko and Nakamura, Takashi and Fukuda, Masahiro},
 month = {Dec},
 note = {This paper describes parallelization of Finite Element Methods (FEM) on the Parallel Simulation Machine Cenju. One of the most time consuming problems in FEM which we decided to investigate is nonlinear dynamic finite element analysis. The main loop of the nonlinear dynamic finite element analysis is the loop based on Newton-Raphson method which is composed of the following two stages, namely: 1. Calculation of the Stiffness Matrix 2. Solution of a set of linear equations. We decided to tackle the above two problems independently. Generation of the Stiffness Matrix consists of: Process 1 Per Element Calculation in which the element stiffness matrix as well as the reactance force is calculated for each element. Process 2 Accumulative Calculation in which the element stiffness matrices are combined to form the global stiffness matrix. It is quite straightforward to parallelize Process 1, just allocate a group of elements to processors and let them calculate the values for the elements. On the other hand, Process 2 is not so easy to parallelize. We tried two strategies to parallelize Process 2. By eliminating the serial bottleneck, we were able to attain a speed-up of 48 on a 64 processor system. For parallelizing solution of linear equations, we tried two approaches. 1. Prallelizing LU-Factorization. 2. Parallelizing Conjugate Gradient Methods. We first evaluated parallelizing LU-Factorization on a comparatively small set of data. However, due to insufficient amount of parallelism, the speed-up was low-2.9 using 7 processors. So, we decided to tackle parallelizing the conjugate gradient method. The conjugate gradient method we chose is Scaled Conjugate Gradient Method (SCG) which was developed by Hayami for vector computers. For a large scale matrix (8,904 by 8,904) we were able to attain a speed-up of 36 on a 64 processor system., 資料番号: NALSP0016014, レポート番号: NAL SP-16},
 title = {並列シミュレーションマシンCenju上の有限要素法},
 year = {1991}
}