CNAPS parallel computer system

1  Two ways for running a cnaps-c program

1.1  First way

  1. To create a source code
      % vi myfile.cn
    

  2. To compile it

      % cnc myfile.cn
    
    It produces two files: myfile.x, myfile_INIT_DATA
  3. To prepare a data file

      % vi myfile.ascii--an ascii data file
    
  4. To convert the ascii data file to a binary data file

      % xcv (specify input and output file in the interface) 
    
    you will get a binary data file-myfile.input (assume)
  5. To run it

    create a srcipt file

      % vi myfile.codenet
      % cn < myfile.codenet
    
  6. If you want to use xcn, then type

      % xcn
    
    and follow the sequence listed in myfile.codenet file.

1.2  Second way

  1. Edit a source code using cnaps-c language

      % vi demo.cn
    
  2. Call cnaps-c compiler cnc and produce the executable file

    (cnaps-c programming guide, p.4-7)

      % cnc demo.cn -> demo.x  OR
      % cnc -32 demo.cn  OR  (Specify number of PNs)
      % cnc -g demo.cn   OR  (for debugging)
      % cnc -c demo.cn   (Separate compilation)
    

  3. Edit a cnlib program which control the execution of the *.x

      % vi run_demo.c
    
  4. Compile it (Writing CodeNet Applications in C, p.3-1)
     
      % cc -o outputfile sourcefile -L$ADAPTIVE/lib -lcn -I$ADAPTIVE/incldue
    

    for example:

      % cc -o bp_pix pixel.c -L$ADAPTIVE/lib -lcn -I$ADAPTIVE/include
    

    This c file can control the execution of the cnaps-c program

2  A matrix multiplication program for the demonstration

This demonstration contains the following programs for matrix × matrix computations.

  1. mat_mat.cn: a program written in cnaps_c language.
  2. mat_mat.codenet: a script file for running mat_mat.cn.
  3. mat_mat.c: a program to run mat_mat.codenet and measure the time spent.
  4. t_connect.c: a program for measuring the connect and disconnect time.
  5. mat1.codenet: a script file for running connectn and disconnect.
  6. mat_seq.c: a program for the sequential version written in C for the comparion with the parallel version.

2.1  mat_mat.cn

/*-------------------------------------------------
** mat_mat.cn
**
** A program that implements a matrix times matrix
** computation in parallel in CNAPS
**
** Distributing [a11, a12, a13] to node 1,  
** distributing [a21, a22, a23] to node 2,  
** distributing [a31, a32, a33] to node 3,  
** Load matrix B as a mono type.  Broadcasting
** each element of matrix B to three nodes one-by-one
** to calculate the elements of C11, C12, C13 in node 1,
** C21, C22, C23 in node 2, and C31, C32, C33 in node 3.
** 
** 12/19/96, Chong-wei Xu
**-------------------------------------------------
*/
#include <cnio.h>

#define NUM_ROW_COL 4  /* matA's ROW = matB's COL */
#define NUM_K 8        /* matA's COL = matB's ROW */

domain matrix_calc {
  poly int matA_row[NUM_K];
  poly int sum[NUM_ROW_COL];
};
domain matrix_calc mat[NUM_ROW_COL];
int matB[NUM_K][NUM_ROW_COL];

FILE (matAFile, inword, "*.input", NULL, NULL);
FILE (matBFile, inword, "*.input", NULL, NULL);
FILE (resCFile, outword, "*.output", NULL, NULL);

void matrix_calc::readMatrixA(int matA_row[])
{
  mono int r, c;

  open(matAFile);
  connect(matAFile);
  for (r=0; r<NUM_ROW_COL; r++) {
    for (c=0; c<NUM_K; c++)
      mat[r].matA_row[c] = getword();
  }
  close(matAFile);
}

void readMatrixB()
{
  mono int r, c;

  open(matBFile);
  connect(matBFile);
  for (r=0; r<NUM_K; r++) {
    for (c=0; c<NUM_ROW_COL; c++)
      matB[r][c] = getword();
  }
  close(matBFile);
}

entry void mat_mat(void)
{
  mono int r, c;

  [domain matrix_calc]. {
    readMatrixA(matA_row);
    readMatrixB();

    for (r=0; r<NUM_ROW_COL; r++) {
      sum[r] = 0;
      for (c=0; c<NUM_K; c++)
        sum[r] += (matA_row[c] * matB[c][r]);
    }

    open(resCFile);
    connect(resCFile);
    for (r=0; r<NUM_ROW_COL; r++) {
      for (c=0; c<NUM_ROW_COL; c++)
        putword(mat[r].sum[c]);
    }
    close(resCFile);
  }
}

2.2  mat_mat.codenet

/*--------------------------------------------------
** The mat_mat.codenet script file
**--------------------------------------------------
*/
connectn cnaps
loadexe mat_mat.x

cpdn mat_mat.init.data
cpdn matAFile.input 
cpdn matBFile.input
createfile resCFile.output
bindfile INIT_DATA_FILE$ mat_mat.init.data
bindfile matAFile matAFile.input 
bindfile matBFile matBFile.input
bindfile resCFile resCFile.output

run INIT$
run mat_mat

unbindfile resCFile
cpup resCFile.output

disconnect

2.3  mat_mat.c

A program to run mat_mat.codenet and measure the performance.

/*--------------------------------------------------
** mat_mat.c
**--------------------------------------------------
*/
#include <stdio.h>
#include <time.h>

int start_t, end_t;
int t_spend;

void main (void)
{
  clock_t clock();

  start_t = clock();
  system("cn < mat_mat.codenet");
  end_t = clock();
  t_spend = end_t - start_t;
  printf("Spend time = %d millisec.\n", t_spend);
}

2.4  mat1.codenet

A program to make connection and disconnection

/*--------------------------------------------------
** mat1.codenet
**--------------------------------------------------
*/
connectn cnaps
disconnect

2.5  t_connect.c

A program for measuring the connection and disconnection.

/*--------------------------------------------------
** t_connect.c
**--------------------------------------------------
*/
#include <stdio.h>
#include <time.h>

clock_t clock();
int start_t, end_t;
int t_spend, t_connect;

void main (void)
{
  start_t = clock();
  system("cn < mat1.codenet");
  end_t = clock();
  t_connect = end_t - start_t;
  printf("t_connect = %d\n", t_connect);
}

2.6  A sequential program

The performance of a sequential program provides time for the comparison with the parallel version.

/*--------------------------------------------------
** mat_seq.c
**--------------------------------------------------
*/
#include <stdio.h>
#include <time.h>

#define NUM_ROW_COL 4
#define NUM_K 8

long clock();
long start_t, end_t;
long t_spend;

FILE *inp, *outp;

void main(void)
{
  int matA[NUM_ROW_COL][NUM_K], matB[NUM_K][NUM_ROW_COL];
  int matC[NUM_ROW_COL][NUM_ROW_COL];
  int r, c, k, i;

  for (r=0; r<NUM_ROW_COL; r++)
    for (c=0; c<NUM_ROW_COL; c++)
      matC[r][c] = 0;

  if ((inp = fopen("matAFile.ascii", "r")) == NULL) {
    printf("Cannot open the file\n");
    exit(1);
  }
  for (r=0; r<NUM_ROW_COL; r++)
    for (c=0; c<NUM_K; c++) {
      fscanf(inp, "%d", &matA[r][c]);
    }
 inp = fopen("matBFile.ascii", "r");
  for (r=0; r<NUM_K; r++)
    for (c=0; c<NUM_ROW_COL; c++) {
      fscanf(inp, "%d", &matB[r][c]);
    }

  start_t = clock();

  for (i=0; i<1000; i++) {
  for (r=0; r<NUM_ROW_COL; r++)
    for (c=0; c<NUM_ROW_COL; c++)
      for (k=0; k<NUM_K; k++)
        matC[r][c] += matA[r][k] * matB[k][c];
  }
  end_t = clock();

  outp = fopen("resCFile.ascii", "w");
  for (r=0; r<NUM_ROW_COL; r++)
    for (c=0; c<NUM_ROW_COL; c++)
      fprintf(outp, "%d ", matC[r][c]);

  fclose(outp);
  t_spend = (long) ((end_t - start_t) / 1000);
  printf("start_t = %ld, end_t = %ld, spend time = %ld microsec.\n",
    start_t, end_t, t_spend);
}

3  A software architecture of two-level parallelism

Distributed (network_based) computing has been popular because computer networks are available almost anywhere and some systems that supporting the development of distributed computing, for example Parallel Virtual Machine (PVM), are available and powerful. Generally speaking, PVM or similar systems are usually used to implement coarse-grained parallelism, which not only limits the speedup expected from parallel computing but also limits the study of fine-grained parallel algorithms.

In order to further explore the parallelization technique and provide a system which can accomplish both coarse- and fine-grained parallelizations, we have studied the techniques of two-level parallelization. The computers at the first level execute the coarse-grained parallel computations and act as clients to send requests to the computers at the second level when needed. The servers at the second level perform fine-grained parallel computations and return the results back to the clients at the first level. More fine-grained parallelizations may be realized by multi-level parallelism.

For testing its feasibility, we designed and implemented the pvm_cnaps.c program.

/*------------------------------------------------------------
** pvm_cnaps.c
**
** This program is designed to test the implementation of 
** two layers paralleling using pvm and cnaps. 
**  
** 01/21/97 Delong Yang
** 01/25/97 Chongwei Xu
**------------------------------------------------------------
*/
#include <stdio.h>
#include <pvm3.h> 
#include "cnlib.h" 
#include <time.h>
#define  N_SLAVE 10

char *load_file(char *);
void down_file(char *, char *);
void pack_file(char *); 
void unpk_file(char *); 
char *c_name(char *, int, char *);
void run_cnaps(char *, char *, char *, char *, char *, int); 

clock_t clock(void);
int main(int argc, char *argv[])
{
  int result;
  int i, j;
  int mytid, num_task, msg_len, msgtype, me;
  int slave[N_SLAVE];
  int myparent, narch, nhost, nslave;
  struct pvmhostinfo *hostp[N_SLAVE];
  FILE *fp;
  char *ptr, *fx, *finit, *fshort, *fvector, *fout;

  /* enroll in pvm */
  if ((mytid = pvm_mytid()) < 0) {
    pvm_perror(argv[0]);
    pvm_exit();
    return -1;
  }
  /* my parent? */
  myparent = pvm_parent();
  if ((myparent < 0) && (myparent != PvmNoParent)) {
    pvm_perror(argv[0]);
    pvm_exit();
    return -1;
  }

  printf("Program is running. Please waiting......\n");
  if (myparent == PvmNoParent) {    /* master ? */
    slave[0] = mytid; 
    nslave = N_SLAVE;
    pvm_config(&nhost, &narch, hostp);
    if (nslave> nhost) 
      nslave = nhost;

    /* spawn children */
    num_task=pvm_spawn(argv[0], (char **) 0, 0, "", nslave-1, &slave[1]);
    if (num_task < nslave-1) {
      printf("Fail to spawn slaves. Abort process. Error codes are:\n");
      for (i=num_task; i<nslave; i++) {
        printf("TID %d %d \n",i,slave[i]);
      }
      for(i=1; i<num_task; i++) {
        pvm_kill(slave[i]);
      }
      pvm_exit();
      return -1;
    }
    msgtype = 1;
    pvm_initsend(PvmDataDefault);
      pvm_pkint(&nslave,1,1);      /*number of slaves*/
      pvm_pkint(slave,nslave,1);   /*slave's tids*/
      /* send initial and input data to slaves */
      pack_file("mat_mul_16.x");    
      pack_file("mat_mul_16.init.data");
      pack_file("short_16.input");
      pack_file("vector_16.input");
    pvm_mcast(slave,nslave,msgtype);

    /*wait for slaves to reply*/
    for(i=1; i<nslave; i++) {
      msgtype = 10 + i;
      pvm_recv(slave[i], msgtype);
      pvm_upkint(&result, 1, 1);
      /*
      fout = c_name("result", i, ".output");
      unpk_file(fout);
      */
      printf("The slave %d has done.\n",result);
    }
  } else {        /* slave section */
    msgtype = 1;
    pvm_recv(myparent, msgtype);
      pvm_upkint(&nslave, 1, 1);
      pvm_upkint(slave, nslave, 1); 

    /*who am I?*/
    for (i=0; i<nslave; i++)
      if (mytid == slave[i]) {
        me = i; 
        break;
      }

    /* receive data from master */
    fx = c_name("mat_mul_16", me, ".x");
    unpk_file(fx);
    finit = c_name("mat_mul_16", me, ".init.data");
    unpk_file(finit);
    fshort = c_name("short_16", me, ".input");
    unpk_file(fshort);
    fvector = c_name("vector_16", me, ".input");
    unpk_file(fvector);
    fout = c_name("result", me, ".output");
      
    /* run cnaps subroutine */
    run_cnaps(fx, finit, fshort, fvector, fout, 1000);

    /* reply to master */
    msgtype = 10 + me;
    pvm_initsend(PvmDataDefault);
      pvm_pkint(&me, 1, 1);
      /*
      pack_file(fout);
      */
    pvm_send(myparent, msgtype);
  }
  pvm_exit(); 
  return 0;
}

/*------------------------------------------------------------
** append a series number to file name 
**------------------------------------------------------------
*/
char *c_name(char *name, int num, char ext[])
{
  char tail[2];
  
  tail[0] = '0' + num; 
  tail[1]='\0';
  strcat(name, tail);
  strcat(name, ext);
  return name;
} 

/*------------------------------------------------------------
** pack data from buffer
**------------------------------------------------------------
*/
void pack_file(char *fname) 
{
  char *temp_buf;
  int buf_len, *ptr;

  temp_buf = load_file(fname);
  /* get file length, it is stored at the beginning
     of the buffer (first 4 bytes). */
  ptr = (int *) temp_buf;
  buf_len = *ptr;
  buf_len += sizeof(int); /* buffer length=file length + 4) */
  pvm_pkint(&buf_len, 1, 1);
  pvm_pkbyte(temp_buf, buf_len, 1);
  free(temp_buf);
}

/*------------------------------------------------------------
** unpack data and save it to a file
**------------------------------------------------------------
*/
void unpk_file(char *fname)
{
  char *ptr, *temp_buf;
  int buf_len;
  FILE *fp;

  pvm_upkint(&buf_len, 1, 1);
  if ((temp_buf=(char *) malloc((unsigned)
              buf_len*sizeof(char)+sizeof(int)))==NULL) {
    printf("memory allocation error!");
    exit(-1);
  }

  pvm_upkbyte(temp_buf, buf_len, 1);
  fp = fopen(fname,"w");
  fwrite(temp_buf+sizeof(int),sizeof(char),buf_len-sizeof(int),fp);
  fclose(fp);
  free(temp_buf);
}

/*------------------------------------------------------------
** open a file and load it to buffer
**------------------------------------------------------------
*/
char *load_file(char *name)
{
  FILE *fpt;
  char *buffer;
  int nbytes,*pointer;

  if(!(fpt = fopen(name, "r"))) {
    printf("Can't open file!");
  }
  fseek(fpt,0,2);
  nbytes = ftell(fpt);
  if((buffer=(char *) malloc((unsigned)
              nbytes*sizeof(char)+sizeof(int)+4))==NULL) {
    printf("memory allocation error!");
  }
  pointer = (int *) buffer;
  *pointer = nbytes;
  rewind(fpt);
  fread(buffer+sizeof(int), sizeof(char), nbytes, fpt);
  fclose(fpt);
  return buffer;
}

/*------------------------------------------------------------
** cnaps subroutine 
**------------------------------------------------------------
*/
void run_cnaps(char *fx, char *finit, char *fshort, 
               char *fvector, char *out, int size)
{
  /* Initialize cnlib and establish a connection */
  cnInit(LOG_ALL, "stdout", CONT);
  cnConnectN("cnaps");

  /* Load the Execution File */
  cnLoadExe(fx);

  /* Download the input data files */
  cnCpDn(finit, NULL); 
  cnCpDn(fshort, NULL);
  cnCpDn(fvector, NULL);

  /* Create the output data files */
  cnCreateFile(out, size);

  /* Bind the input and output files */
  cnBindFile("INIT_DATA_FILE$", finit, SERVER);
  cnBindFile("matfile", fshort, SERVER);
  cnBindFile("vecfile", fvector, SERVER);
  cnBindFile("resfile", out, SERVER);

  /* Run the entry points */
  cnRun("INIT$");
  cnRun("mat_mul");

  /* Unbind and upload output files */
  cnUnbindFile("resfile");
  cnCpUp(out, NULL);

  /* Disconnect */
  cnDisconnect();
}


File translated from TEX by TTH, version 1.55.