#include <ComputePmeCUDAMgr.h>
Classes | |
struct | ExtraDevice |
struct | IJ |
struct | NodeDevice |
Public Member Functions | |
ComputePmeCUDAMgr () | |
ComputePmeCUDAMgr (CkMigrateMessage *) | |
~ComputePmeCUDAMgr () | |
void | setupPencils () |
void | initialize (CkQdMsg *msg) |
void | initialize_pencils (CkQdMsg *msg) |
void | activate_pencils (CkQdMsg *msg) |
PmeGrid | getPmeGrid () |
int | getNode (int i, int j) |
int | getDevice (int i, int j) |
int | getDevicePencilY (int i, int j) |
int | getDevicePencilZ (int i, int j) |
int | getDeviceIDPencilX (int i, int j) |
int | getDeviceIDPencilY (int i, int j) |
int | getDeviceIDPencilZ (int i, int j) |
void | recvPencils (CProxy_CudaPmePencilXYZ xyz) |
void | recvPencils (CProxy_CudaPmePencilXY xy, CProxy_CudaPmePencilZ z) |
void | recvPencils (CProxy_CudaPmePencilX x, CProxy_CudaPmePencilY y, CProxy_CudaPmePencilZ z) |
void | createDevicesAndAtomFiler () |
void | recvDevices (RecvDeviceMsg *msg) |
void | recvAtomFiler (CProxy_PmeAtomFiler filer) |
void | skip () |
void | recvAtoms (PmeAtomMsg *msg) |
void | getHomePencil (PatchID patchID, int &homey, int &homez) |
int | getHomeNode (PatchID patchID) |
bool | isPmePe (int pe) |
bool | isPmeNode (int node) |
bool | isPmeDevice (int deviceID) |
Static Public Member Functions | |
static ComputePmeCUDAMgr * | Object () |
Public Attributes | |
ComputePmeCUDAMgr_SDAG_CODE |
Definition at line 430 of file ComputePmeCUDAMgr.h.
ComputePmeCUDAMgr::ComputePmeCUDAMgr | ( | ) |
Definition at line 204 of file ComputePmeCUDAMgr.C.
ComputePmeCUDAMgr::ComputePmeCUDAMgr | ( | CkMigrateMessage * | ) |
Definition at line 215 of file ComputePmeCUDAMgr.C.
References NAMD_bug().
00215 { 00216 __sdag_init(); 00217 NAMD_bug("ComputePmeCUDAMgr cannot be migrated"); 00218 numDevices = 0; 00219 numTotalPatches = 0; 00220 numNodesContributed = 0; 00221 numDevicesMax = 0; 00222 }
ComputePmeCUDAMgr::~ComputePmeCUDAMgr | ( | ) |
void ComputePmeCUDAMgr::activate_pencils | ( | CkQdMsg * | msg | ) |
Definition at line 809 of file ComputePmeCUDAMgr.C.
References PmeStartMsg::data, PmeStartMsg::dataSize, and j.
00809 { 00810 if (msg != NULL) delete msg; 00811 00812 for (int device=0;device < numDevices;device++) { 00813 deviceProxy[device].ckLocalBranch()->activate_pencils(); 00814 } 00815 00816 for (int i=0;i < ijPencilY.size();i++) { 00817 PmeStartMsg* pmeStartYMsg = new PmeStartMsg(); 00818 pmeStartYMsg->data = NULL; 00819 pmeStartYMsg->dataSize = 0; 00820 pmePencilY(ijPencilY[i].i, 0, ijPencilY[i].j).start(pmeStartYMsg); 00821 } 00822 00823 for (int i=0;i < ijPencilZ.size();i++) { 00824 PmeStartMsg* pmeStartZMsg = new PmeStartMsg(); 00825 pmeStartZMsg->data = NULL; 00826 pmeStartZMsg->dataSize = 0; 00827 pmePencilZ(ijPencilZ[i].i, ijPencilZ[i].j, 0).start(pmeStartZMsg); 00828 } 00829 00830 }
void ComputePmeCUDAMgr::createDevicesAndAtomFiler | ( | ) |
Definition at line 679 of file ComputePmeCUDAMgr.C.
References RecvDeviceMsg::dev, NAMD_bug(), RecvDeviceMsg::numDevicesMax, and PRIORITY_SIZE.
00679 { 00680 if (CkMyNode() != 0) 00681 NAMD_bug("ComputePmeCUDAMgr::createDevicesAndAtomFiler can only be called on root node"); 00682 00683 // Root node creates all device proxies 00684 // NOTE: Only root node has numDevicesMax 00685 RecvDeviceMsg* msg = new (numDevicesMax, PRIORITY_SIZE) RecvDeviceMsg(); 00686 msg->numDevicesMax = numDevicesMax; 00687 for (int i=0;i < numDevicesMax;i++) { 00688 CProxy_ComputePmeCUDADevice dev = CProxy_ComputePmeCUDADevice::ckNew(); 00689 memcpy(&msg->dev[i], &dev, sizeof(CProxy_ComputePmeCUDADevice)); 00690 } 00691 thisProxy.recvDevices(msg); 00692 00693 CProxy_PmeAtomFiler filer = CProxy_PmeAtomFiler::ckNew(); 00694 thisProxy.recvAtomFiler(filer); 00695 00696 }
int ComputePmeCUDAMgr::getDevice | ( | int | i, | |
int | j | |||
) |
Definition at line 854 of file ComputePmeCUDAMgr.C.
References NAMD_bug(), PmeGrid::yBlocks, and PmeGrid::zBlocks.
Referenced by getDeviceIDPencilX(), and recvAtoms().
00854 { 00855 if (i < 0 || i >= pmeGrid.yBlocks || j < 0 || j >= pmeGrid.zBlocks) 00856 NAMD_bug("ComputePmeCUDAMgr::getDevice, pencil index out of bounds"); 00857 int ind = i + j*pmeGrid.yBlocks; 00858 int device = nodeDeviceList[ind].device; 00859 if (device == -1) 00860 NAMD_bug("ComputePmeCUDAMgr::getDevice, no device found"); 00861 return device; 00862 }
int ComputePmeCUDAMgr::getDeviceIDPencilX | ( | int | i, | |
int | j | |||
) |
Definition at line 895 of file ComputePmeCUDAMgr.C.
References getDevice().
int ComputePmeCUDAMgr::getDeviceIDPencilY | ( | int | i, | |
int | j | |||
) |
Definition at line 903 of file ComputePmeCUDAMgr.C.
References getDevicePencilY().
00903 { 00904 int device = getDevicePencilY(i, j); 00905 return deviceProxy[device].ckLocalBranch()->getDeviceID(); 00906 }
int ComputePmeCUDAMgr::getDeviceIDPencilZ | ( | int | i, | |
int | j | |||
) |
Definition at line 911 of file ComputePmeCUDAMgr.C.
References getDevicePencilZ().
00911 { 00912 int device = getDevicePencilZ(i, j); 00913 return deviceProxy[device].ckLocalBranch()->getDeviceID(); 00914 }
int ComputePmeCUDAMgr::getDevicePencilY | ( | int | i, | |
int | j | |||
) |
Definition at line 867 of file ComputePmeCUDAMgr.C.
References NAMD_bug(), PmeGrid::xBlocks, and PmeGrid::zBlocks.
Referenced by getDeviceIDPencilY().
00867 { 00868 if (i < 0 || i >= pmeGrid.xBlocks || j < 0 || j >= pmeGrid.zBlocks) 00869 NAMD_bug("ComputePmeCUDAMgr::getDevicePencilY, pencil index out of bounds"); 00870 for (int device=0;device < ijPencilY.size();device++) { 00871 if (ijPencilY[device].i == i && ijPencilY[device].j == j) return device; 00872 } 00873 char str[256]; 00874 sprintf(str, "ComputePmeCUDAMgr::getDevicePencilY, no device found at i %d j %d",i,j); 00875 NAMD_bug(str); 00876 return -1; 00877 }
int ComputePmeCUDAMgr::getDevicePencilZ | ( | int | i, | |
int | j | |||
) |
Definition at line 882 of file ComputePmeCUDAMgr.C.
References NAMD_bug(), PmeGrid::xBlocks, and PmeGrid::yBlocks.
Referenced by getDeviceIDPencilZ().
00882 { 00883 if (i < 0 || i >= pmeGrid.xBlocks || j < 0 || j >= pmeGrid.yBlocks) 00884 NAMD_bug("ComputePmeCUDAMgr::getDevicePencilZ, pencil index out of bounds"); 00885 for (int device=0;device < ijPencilZ.size();device++) { 00886 if (ijPencilZ[device].i == i && ijPencilZ[device].j == j) return device; 00887 } 00888 NAMD_bug("ComputePmeCUDAMgr::getDevicePencilZ, no device found"); 00889 return -1; 00890 }
int ComputePmeCUDAMgr::getHomeNode | ( | PatchID | patchID | ) |
Definition at line 845 of file ComputePmeCUDAMgr.C.
References getHomePencil(), and getNode().
00845 { 00846 int homey, homez; 00847 getHomePencil(patchID, homey, homez); 00848 return getNode(homey, homez); 00849 }
void ComputePmeCUDAMgr::getHomePencil | ( | PatchID | patchID, | |
int & | homey, | |||
int & | homez | |||
) |
Definition at line 238 of file ComputePmeCUDAMgr.C.
References getPencilDim(), PmeGrid::K2, PmeGrid::K3, PatchMap::max_b(), PatchMap::max_c(), PatchMap::min_b(), PatchMap::min_c(), NAMD_bug(), Object(), PmeGrid::order, Perm_X_Y_Z, PmeGrid::yBlocks, and PmeGrid::zBlocks.
Referenced by getHomeNode().
00238 { 00239 PatchMap *patchMap = PatchMap::Object(); 00240 00241 BigReal miny = patchMap->min_b(patchID); 00242 BigReal maxy = patchMap->max_b(patchID); 00243 00244 BigReal minz = patchMap->min_c(patchID); 00245 BigReal maxz = patchMap->max_c(patchID); 00246 00247 // Determine home pencil = pencil with most overlap 00248 00249 // Calculate patch grid coordinates 00250 int patch_y0 = floor((miny+0.5)*pmeGrid.K2); 00251 int patch_y1 = floor((maxy+0.5)*pmeGrid.K2)-1; 00252 int patch_z0 = floor((minz+0.5)*pmeGrid.K3); 00253 int patch_z1 = floor((maxz+0.5)*pmeGrid.K3)-1; 00254 00255 if (patch_y0 < 0 || patch_y1 >= pmeGrid.K2 || patch_z0 < 0 || patch_z1 >= pmeGrid.K3) { 00256 NAMD_bug("ComputePmeCUDAMgr::getHomePencil, patch bounds are outside grid bounds"); 00257 } 00258 00259 int maxOverlap = 0; 00260 homey = -1; 00261 homez = -1; 00262 for (int iz=0;iz < pmeGrid.zBlocks;iz++) { 00263 for (int iy=0;iy < pmeGrid.yBlocks;iy++) { 00264 int pencil_x0, pencil_x1, pencil_y0, pencil_y1, pencil_z0, pencil_z1; 00265 getPencilDim(pmeGrid, Perm_X_Y_Z, iy, iz, 00266 pencil_x0, pencil_x1, pencil_y0, pencil_y1, pencil_z0, pencil_z1); 00267 00268 if (pencil_y1 - pencil_y0 < pmeGrid.order || pencil_z1 - pencil_z0 < pmeGrid.order) 00269 NAMD_bug("ComputePmeCUDAMgr::getHomePencil, pencil size must be >= PMEInterpOrder"); 00270 00271 int y0 = std::max(patch_y0, pencil_y0); 00272 int y1 = std::min(patch_y1, pencil_y1); 00273 int z0 = std::max(patch_z0, pencil_z0); 00274 int z1 = std::min(patch_z1, pencil_z1); 00275 00276 int overlap = (y1-y0 > 0 && z1-z0 > 0) ? (y1-y0)*(z1-z0) : -1; 00277 00278 if (overlap > maxOverlap) { 00279 maxOverlap = overlap; 00280 homey = iy; 00281 homez = iz; 00282 } 00283 } 00284 } 00285 00286 if (homey == -1 || homez == -1) 00287 NAMD_bug("ComputePmeCUDAMgr::getHomePencil, home pencil not found"); 00288 }
int ComputePmeCUDAMgr::getNode | ( | int | i, | |
int | j | |||
) |
Definition at line 835 of file ComputePmeCUDAMgr.C.
References NAMD_bug(), PmeGrid::yBlocks, and PmeGrid::zBlocks.
Referenced by getHomeNode().
PmeGrid ComputePmeCUDAMgr::getPmeGrid | ( | ) | [inline] |
Definition at line 440 of file ComputePmeCUDAMgr.h.
Referenced by ComputePmeCUDA::initialize().
void ComputePmeCUDAMgr::initialize | ( | CkQdMsg * | msg | ) |
Definition at line 598 of file ComputePmeCUDAMgr.C.
References endi(), iINFO(), iout, setupPencils(), PmeGrid::xBlocks, PmeGrid::yBlocks, and PmeGrid::zBlocks.
00598 { 00599 if (msg != NULL) delete msg; 00600 00601 setupPencils(); 00602 00603 if ( ! CkMyNode() ) { 00604 iout << iINFO << "PME using " << pmeGrid.xBlocks << " x " << 00605 pmeGrid.yBlocks << " x " << pmeGrid.zBlocks << 00606 " pencil grid for FFT and reciprocal sum.\n" << endi; 00607 } 00608 00609 // Initialize list that contains the number of home patches for each device on this manager 00610 numHomePatchesList.resize(numDevices, 0); 00611 00612 //-------------------------------------------------------------------------- 00613 // Create devices and atom filer 00614 // numDevices = number of devices we'll be using, possibly different on each node 00615 // Use root node to compute the maximum number of devices in use over all nodes 00616 thisProxy[0].initializeDevicesAndAtomFiler(new NumDevicesMsg(numDevices)); 00617 //-------------------------------------------------------------------------- 00618 00619 if (CkMyNode() == 0) { 00620 00621 if (pmePencilType == 3) { 00622 // Single block => 3D FFT 00623 CProxy_PmePencilXYZMap xyzMap = CProxy_PmePencilXYZMap::ckNew(xPes[0]); 00624 CkArrayOptions xyzOpts(1); 00625 xyzOpts.setMap(xyzMap); 00626 xyzOpts.setAnytimeMigration(false); 00627 xyzOpts.setStaticInsertion(true); 00628 pmePencilXYZ = CProxy_CudaPmePencilXYZ::ckNew(xyzOpts); 00629 pmePencilXYZ[0].initialize(new CudaPmeXYZInitMsg(pmeGrid)); 00630 thisProxy.recvPencils(pmePencilXYZ); 00631 } else if (pmePencilType == 2) { 00632 // Blocks in all but y-dimension => 2D FFT 00633 CProxy_PmePencilXYMap xyMap = CProxy_PmePencilXYMap::ckNew(xPes); 00634 CProxy_PmePencilXMap zMap = CProxy_PmePencilXMap::ckNew(0, 1, pmeGrid.xBlocks, zPes); 00635 CkArrayOptions xyOpts(1, 1, pmeGrid.zBlocks); 00636 CkArrayOptions zOpts(pmeGrid.xBlocks, 1, 1); 00637 xyOpts.setMap(xyMap); 00638 zOpts.setMap(zMap); 00639 xyOpts.setAnytimeMigration(false); 00640 zOpts.setAnytimeMigration(false); 00641 xyOpts.setStaticInsertion(true); 00642 zOpts.setStaticInsertion(true); 00643 pmePencilXY = CProxy_CudaPmePencilXY::ckNew(xyOpts); 00644 pmePencilZ = CProxy_CudaPmePencilZ::ckNew(zOpts); 00645 // Send pencil proxies to other nodes 00646 thisProxy.recvPencils(pmePencilXY, pmePencilZ); 00647 pmePencilXY.initialize(new CudaPmeXYInitMsg(pmeGrid, pmePencilXY, pmePencilZ, xyMap, zMap)); 00648 pmePencilZ.initialize(new CudaPmeXYInitMsg(pmeGrid, pmePencilXY, pmePencilZ, xyMap, zMap)); 00649 } else { 00650 // Blocks in all dimensions => 1D FFT 00651 CProxy_PmePencilXMap xMap = CProxy_PmePencilXMap::ckNew(1, 2, pmeGrid.yBlocks, xPes); 00652 CProxy_PmePencilXMap yMap = CProxy_PmePencilXMap::ckNew(0, 2, pmeGrid.xBlocks, yPes); 00653 CProxy_PmePencilXMap zMap = CProxy_PmePencilXMap::ckNew(0, 1, pmeGrid.xBlocks, zPes); 00654 CkArrayOptions xOpts(1, pmeGrid.yBlocks, pmeGrid.zBlocks); 00655 CkArrayOptions yOpts(pmeGrid.xBlocks, 1, pmeGrid.zBlocks); 00656 CkArrayOptions zOpts(pmeGrid.xBlocks, pmeGrid.yBlocks, 1); 00657 xOpts.setMap(xMap); 00658 yOpts.setMap(yMap); 00659 zOpts.setMap(zMap); 00660 xOpts.setAnytimeMigration(false); 00661 yOpts.setAnytimeMigration(false); 00662 zOpts.setAnytimeMigration(false); 00663 xOpts.setStaticInsertion(true); 00664 yOpts.setStaticInsertion(true); 00665 zOpts.setStaticInsertion(true); 00666 pmePencilX = CProxy_CudaPmePencilX::ckNew(xOpts); 00667 pmePencilY = CProxy_CudaPmePencilY::ckNew(yOpts); 00668 pmePencilZ = CProxy_CudaPmePencilZ::ckNew(zOpts); 00669 // Send pencil proxies to other nodes 00670 thisProxy.recvPencils(pmePencilX, pmePencilY, pmePencilZ); 00671 pmePencilX.initialize(new CudaPmeXInitMsg(pmeGrid, pmePencilX, pmePencilY, pmePencilZ, xMap, yMap, zMap)); 00672 pmePencilY.initialize(new CudaPmeXInitMsg(pmeGrid, pmePencilX, pmePencilY, pmePencilZ, xMap, yMap, zMap)); 00673 pmePencilZ.initialize(new CudaPmeXInitMsg(pmeGrid, pmePencilX, pmePencilY, pmePencilZ, xMap, yMap, zMap)); 00674 } 00675 } 00676 00677 }
void ComputePmeCUDAMgr::initialize_pencils | ( | CkQdMsg * | msg | ) |
Definition at line 732 of file ComputePmeCUDAMgr.C.
References createStream(), cudaCheck, deviceCUDA, DeviceCUDA::getDeviceIDbyRank(), DeviceCUDA::getNumDevice(), j, and stream.
00732 { 00733 if (msg != NULL) delete msg; 00734 00735 int numDevicesTmp = deviceCUDA->getNumDevice(); 00736 00737 // Initialize device proxies for real-space interfacing 00738 for (int i=0;i < ijPencilX.size();i++) { 00739 // NOTE: i is here the device ID 00740 int deviceID = deviceCUDA->getDeviceIDbyRank(i % numDevicesTmp); 00741 deviceProxy[i].ckLocalBranch()->initialize(pmeGrid, ijPencilX[i].i, ijPencilX[i].j, 00742 deviceID, pmePencilType, thisProxy, pmeAtomFiler); 00743 if (pmePencilType == 1) { 00744 deviceProxy[i].ckLocalBranch()->setPencilProxy(pmePencilX); 00745 } else if (pmePencilType == 2) { 00746 deviceProxy[i].ckLocalBranch()->setPencilProxy(pmePencilXY); 00747 } else { 00748 deviceProxy[i].ckLocalBranch()->setPencilProxy(pmePencilXYZ); 00749 } 00750 } 00751 00752 // Use above initialized device proxies for the PME pencils that interface with real-space 00753 for (int i=0;i < ijPencilX.size();i++) { 00754 if (pmePencilType == 1) { 00755 pmePencilX(0, ijPencilX[i].i, ijPencilX[i].j).initializeDevice(new InitDeviceMsg(deviceProxy[i])); 00756 } else if (pmePencilType == 2) { 00757 pmePencilXY(0, 0, ijPencilX[i].j).initializeDevice(new InitDeviceMsg(deviceProxy[i])); 00758 } else { 00759 pmePencilXYZ[0].initializeDevice(new InitDeviceMsg(deviceProxy[i])); 00760 } 00761 } 00762 00763 // Create extra devices for Y and Z pencils if necessary 00764 int n = std::max(ijPencilY.size(), ijPencilZ.size()); 00765 if (n > ijPencilX.size()) { 00766 int nextra = n - ijPencilX.size(); 00767 extraDevices.resize(nextra); 00768 for (int i=0;i < nextra;i++) { 00769 extraDevices[i].deviceID = deviceCUDA->getDeviceIDbyRank((i + ijPencilX.size()) % numDevicesTmp); 00770 cudaCheck(cudaSetDevice(extraDevices[i].deviceID)); 00771 createStream(extraDevices[i].stream); 00772 } 00773 } 00774 00775 // Initialize Y pencils 00776 for (int i=0;i < ijPencilY.size();i++) { 00777 int deviceID; 00778 cudaStream_t stream; 00779 if (i < ijPencilX.size()) { 00780 deviceID = deviceProxy[i].ckLocalBranch()->getDeviceID(); 00781 stream = deviceProxy[i].ckLocalBranch()->getStream(); 00782 } else { 00783 deviceID = extraDevices[i-ijPencilX.size()].deviceID; 00784 stream = extraDevices[i-ijPencilX.size()].stream; 00785 } 00786 pmePencilY(ijPencilY[i].i, 0, ijPencilY[i].j).initializeDevice(new InitDeviceMsg2(deviceID, stream, thisProxy)); 00787 } 00788 00789 // Initialize Z pencils 00790 for (int i=0;i < ijPencilZ.size();i++) { 00791 int deviceID; 00792 cudaStream_t stream; 00793 if (i < ijPencilX.size()) { 00794 deviceID = deviceProxy[i].ckLocalBranch()->getDeviceID(); 00795 stream = deviceProxy[i].ckLocalBranch()->getStream(); 00796 } else { 00797 deviceID = extraDevices[i-ijPencilX.size()].deviceID; 00798 stream = extraDevices[i-ijPencilX.size()].stream; 00799 } 00800 pmePencilZ(ijPencilZ[i].i, ijPencilZ[i].j, 0).initializeDevice(new InitDeviceMsg2(deviceID, stream, thisProxy)); 00801 } 00802 00803 }
bool ComputePmeCUDAMgr::isPmeDevice | ( | int | deviceID | ) |
Definition at line 585 of file ComputePmeCUDAMgr.C.
References deviceCUDA, DeviceCUDA::getDeviceIDbyRank(), and DeviceCUDA::getNumDevice().
00585 { 00586 for (int i=0;i < nodeDeviceList.size();i++) { 00587 if (deviceCUDA->getDeviceIDbyRank(nodeDeviceList[i].device % deviceCUDA->getNumDevice()) == deviceID) { 00588 return true; 00589 } 00590 } 00591 return false; 00592 }
bool ComputePmeCUDAMgr::isPmeNode | ( | int | node | ) |
Definition at line 573 of file ComputePmeCUDAMgr.C.
bool ComputePmeCUDAMgr::isPmePe | ( | int | pe | ) |
Definition at line 563 of file ComputePmeCUDAMgr.C.
Referenced by CudaComputeNonbonded::assignPatches().
static ComputePmeCUDAMgr* ComputePmeCUDAMgr::Object | ( | ) | [inline, static] |
Definition at line 464 of file ComputePmeCUDAMgr.h.
Referenced by CudaComputeNonbonded::assignPatches(), getHomePencil(), and setupPencils().
void ComputePmeCUDAMgr::recvAtomFiler | ( | CProxy_PmeAtomFiler | filer | ) |
Definition at line 698 of file ComputePmeCUDAMgr.C.
void ComputePmeCUDAMgr::recvAtoms | ( | PmeAtomMsg * | msg | ) |
Definition at line 933 of file ComputePmeCUDAMgr.C.
References getDevice(), PmeAtomMsg::i, and PmeAtomMsg::j.
void ComputePmeCUDAMgr::recvDevices | ( | RecvDeviceMsg * | msg | ) |
Definition at line 702 of file ComputePmeCUDAMgr.C.
References RecvDeviceMsg::dev, NAMD_bug(), and RecvDeviceMsg::numDevicesMax.
00702 { 00703 numDevicesMax = msg->numDevicesMax; 00704 if (numDevices > numDevicesMax) 00705 NAMD_bug("ComputePmeCUDAMgr::recvDevices, numDevices > numDevicesMax"); 00706 deviceProxy.resize(numDevices); 00707 for (int i=0;i < numDevices;i++) { 00708 deviceProxy[i] = msg->dev[i]; 00709 } 00710 delete msg; 00711 }
void ComputePmeCUDAMgr::recvPencils | ( | CProxy_CudaPmePencilX | x, | |
CProxy_CudaPmePencilY | y, | |||
CProxy_CudaPmePencilZ | z | |||
) |
Definition at line 722 of file ComputePmeCUDAMgr.C.
void ComputePmeCUDAMgr::recvPencils | ( | CProxy_CudaPmePencilXY | xy, | |
CProxy_CudaPmePencilZ | z | |||
) |
Definition at line 717 of file ComputePmeCUDAMgr.C.
00717 { 00718 pmePencilXY = xy; 00719 pmePencilZ = z; 00720 }
void ComputePmeCUDAMgr::recvPencils | ( | CProxy_CudaPmePencilXYZ | xyz | ) |
Definition at line 713 of file ComputePmeCUDAMgr.C.
void ComputePmeCUDAMgr::setupPencils | ( | ) |
Definition at line 400 of file ComputePmeCUDAMgr.C.
References PmeGrid::block1, PmeGrid::block2, PmeGrid::block3, deviceCUDA, PmeGrid::dim2, PmeGrid::dim3, DeviceCUDA::getNumDevice(), if(), PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, NAMD_bug(), Object(), PmeGrid::order, SimParameters::PMEGridSizeX, SimParameters::PMEGridSizeY, SimParameters::PMEGridSizeZ, SimParameters::PMEInterpOrder, SimParameters::PMEPencilsX, SimParameters::PMEPencilsY, SimParameters::PMEPencilsZ, PmeGrid::xBlocks, PmeGrid::yBlocks, and PmeGrid::zBlocks.
Referenced by initialize().
00400 { 00401 SimParameters *simParams = Node::Object()->simParameters; 00402 00403 pmeGrid.K1 = simParams->PMEGridSizeX; 00404 pmeGrid.K2 = simParams->PMEGridSizeY; 00405 pmeGrid.K3 = simParams->PMEGridSizeZ; 00406 pmeGrid.order = simParams->PMEInterpOrder; 00407 pmeGrid.dim2 = pmeGrid.K2; 00408 pmeGrid.dim3 = 2 * (pmeGrid.K3/2 + 1); 00409 00410 // Count the total number of devices assuming all nodes have the same number as this node 00411 // NOTE: This should be changed in the future to support heterogeneous nodes!!! 00412 int numDevicesTmp = deviceCUDA->getNumDevice(); 00413 00414 int numDeviceTot = CkNumNodes() * numDevicesTmp; 00415 // Use approximately 1/4th of the devices for PME 00416 int numDeviceToUse = std::max(1, numDeviceTot/4); 00417 00418 if (numDeviceToUse < 4) { 00419 // 2D Slab 00420 pmeGrid.yBlocks = 1; 00421 pmeGrid.xBlocks = pmeGrid.zBlocks = numDeviceToUse; 00422 } else { 00423 // 1D Pencil 00424 pmeGrid.yBlocks = (int)sqrt((double)numDeviceToUse); 00425 pmeGrid.zBlocks = numDeviceToUse/pmeGrid.yBlocks; 00426 pmeGrid.xBlocks = pmeGrid.zBlocks; 00427 } 00428 00429 if ( simParams->PMEPencilsX > 0 ) pmeGrid.xBlocks = simParams->PMEPencilsX; 00430 if ( simParams->PMEPencilsY > 0 ) pmeGrid.yBlocks = simParams->PMEPencilsY; 00431 if ( simParams->PMEPencilsZ > 0 ) pmeGrid.zBlocks = simParams->PMEPencilsZ; 00432 00433 // Restrict number of pencils to the maximum number 00434 restrictToMaxPMEPencils(); 00435 00436 // Fix pencil numbers if they don't make sense w.r.t. number of devices 00437 if (pmeGrid.yBlocks == 1) { 00438 // 2D Slab 00439 if (pmeGrid.xBlocks > numDeviceTot) pmeGrid.xBlocks = numDeviceTot; 00440 if (pmeGrid.zBlocks > numDeviceTot) pmeGrid.zBlocks = numDeviceTot; 00441 } else { 00442 // 1D Pencil 00443 if (pmeGrid.yBlocks*pmeGrid.zBlocks > numDeviceTot || 00444 pmeGrid.xBlocks*pmeGrid.zBlocks > numDeviceTot || 00445 pmeGrid.xBlocks*pmeGrid.yBlocks > numDeviceTot) { 00446 pmeGrid.yBlocks = std::min(pmeGrid.yBlocks, (int)sqrt((double)numDeviceTot)); 00447 pmeGrid.zBlocks = std::min(pmeGrid.zBlocks, numDeviceTot/pmeGrid.yBlocks); 00448 } 00449 pmeGrid.xBlocks = std::min(pmeGrid.yBlocks, pmeGrid.zBlocks); 00450 } 00451 00452 // Here (block1, block2, block3) define the size of charge grid pencil in each direction 00453 pmeGrid.block1 = ( pmeGrid.K1 + pmeGrid.xBlocks - 1 ) / pmeGrid.xBlocks; 00454 pmeGrid.block2 = ( pmeGrid.K2 + pmeGrid.yBlocks - 1 ) / pmeGrid.yBlocks; 00455 pmeGrid.block3 = ( pmeGrid.K3 + pmeGrid.zBlocks - 1 ) / pmeGrid.zBlocks; 00456 00457 // Determine type of FFT 00458 if (pmeGrid.xBlocks == 1 && pmeGrid.yBlocks == 1 && pmeGrid.zBlocks == 1) { 00459 // Single block => 3D FFT 00460 pmePencilType = 3; 00461 } else if (pmeGrid.yBlocks == 1) { 00462 // Blocks in all but y-dimension => 2D FFT 00463 pmePencilType = 2; 00464 } else { 00465 // Blocks in all dimensions => 1D FFT 00466 pmePencilType = 1; 00467 } 00468 00469 //-------------------------------------------------------------------------- 00470 // Map pencils into Pes 00471 xPes.resize(pmeGrid.yBlocks*pmeGrid.zBlocks); 00472 00473 if (pmePencilType == 1 || pmePencilType == 2) { 00474 zPes.resize(pmeGrid.xBlocks*pmeGrid.yBlocks); 00475 } 00476 if (pmePencilType == 1) { 00477 yPes.resize(pmeGrid.xBlocks*pmeGrid.zBlocks); 00478 } 00479 00480 // i % numDeviceTot = device index 00481 // (i % numDeviceTot)/deviceCUDA->getNumDevice() = node index 00482 // (i % CkNodeSize(node)) = pe displacement 00483 for (int i=0;i < xPes.size();i++) { 00484 int node = (i % numDeviceTot)/numDevicesTmp; 00485 xPes[i] = CkNodeFirst(node) + (i + 0) % CkNodeSize(node); 00486 } 00487 for (int i=0;i < yPes.size();i++) { 00488 int node = (i % numDeviceTot)/numDevicesTmp; 00489 yPes[i] = CkNodeFirst(node) + (i + 0) % CkNodeSize(node); 00490 } 00491 for (int i=0;i < zPes.size();i++) { 00492 int node = (i % numDeviceTot)/numDevicesTmp; 00493 zPes[i] = CkNodeFirst(node) + (i + 0) % CkNodeSize(node); 00494 } 00495 00496 // char peStr[256]; 00497 // char *p = peStr; 00498 // p += sprintf(p, "%2d | xPes", CkMyPe()); 00499 // for (int i=0;i < xPes.size();i++) 00500 // p += sprintf(p, " %d", xPes[i]); 00501 // p += sprintf(p, " yPes"); 00502 // for (int i=0;i < yPes.size();i++) 00503 // p += sprintf(p, " %d", yPes[i]); 00504 // p += sprintf(p, " zPes"); 00505 // for (int i=0;i < zPes.size();i++) 00506 // p += sprintf(p, " %d", zPes[i]); 00507 // fprintf(stderr, "%s | %d %d\n",peStr, CkNodeFirst(CkMyNode()), CkNodeSize(CkMyNode())); 00508 00509 //-------------------------------------------------------------------------- 00510 // Build global node list for x-pencils 00511 nodeDeviceList.resize(xPes.size()); 00512 numDevices = 0; 00513 for (int k=0;k < xPes.size();k++) { 00514 nodeDeviceList[k].node = CkNodeOf(xPes[k]); 00515 nodeDeviceList[k].device = -1; 00516 if (nodeDeviceList[k].node == CkMyNode()) { 00517 nodeDeviceList[k].device = numDevices++; 00518 } 00519 } 00520 00521 ijPencilX.clear(); 00522 ijPencilY.clear(); 00523 ijPencilZ.clear(); 00524 00525 // Construct list of pencil coordinates (i,j) for each device held by this node 00526 for (int k=0;k < xPes.size();k++) { 00527 if (CkMyNode() == CkNodeOf(xPes[k])) { 00528 IJ ij; 00529 ij.i = k % pmeGrid.yBlocks; 00530 ij.j = k / pmeGrid.yBlocks; 00531 ijPencilX.push_back(ij); 00532 } 00533 } 00534 if (ijPencilX.size() != numDevices) 00535 NAMD_bug("ComputePmeCUDAMgr::setupPencils, error setting up x-pencils and devices"); 00536 00537 int numDevicesY = 0; 00538 for (int k=0;k < yPes.size();k++) { 00539 if (CkMyNode() == CkNodeOf(yPes[k])) { 00540 IJ ij; 00541 ij.i = k % pmeGrid.xBlocks; 00542 ij.j = k / pmeGrid.xBlocks; 00543 ijPencilY.push_back(ij); 00544 numDevicesY++; 00545 } 00546 } 00547 00548 int numDevicesZ = 0; 00549 for (int k=0;k < zPes.size();k++) { 00550 if (CkMyNode() == CkNodeOf(zPes[k])) { 00551 IJ ij; 00552 ij.i = k % pmeGrid.xBlocks; 00553 ij.j = k / pmeGrid.xBlocks; 00554 ijPencilZ.push_back(ij); 00555 numDevicesZ++; 00556 } 00557 } 00558 }
void ComputePmeCUDAMgr::skip | ( | void | ) |
Definition at line 919 of file ComputePmeCUDAMgr.C.
Definition at line 432 of file ComputePmeCUDAMgr.h.