Skip to content

ColumbiaDVMM/AVT

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

2 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

##AVT - Automatic Video Region Segmentation and Tracking System


####Terms of Use

Copyright (c) 2001 by DVMM Laboratory

Department of Electrical Engineering
Columbia University
Rm 1312 S.W. Mudd, 500 West 120th Street
New York, NY 10027
USA

Di Zhong and Shih-Fu Chang, Columbia University {dzhong,sfchang}@ee.columbia.edu

####1. SYSTEM OVERVIEW

AVT is an automatic region segmentation and tracking system for general video sources. It uses feature fusion and motion projection to track salient video regions over long video sequences.

####2. Installation

The system have been developed andtested under Windows platform. There are two packages available: one (AVT_Binary.zip) is the executable binary file only, and one (AVT_Package.zip) is the full package including executable binary, and several other necessary files to show some "click-and-go" demos instructing how to use the tool. Just unzip the package into your specified folder and there is no additional setup is needed.

Here is a list of the files:

AVT.exe: the executable binary file of AVT. mpeg2dec.exe: the MPEG-2 (also compatible with MPEG-1) video decoder, needed if the input is MPEG video. README.TXT: this helper document mpeg.cut: the parameter file used in the mpeggo.bat demo raw.cut: the parameter file used in the rawgo.bat demo default.par: the default parameter file (optinal for the AVT tool) MPEGGO.bat: the demo file to show how to parse a mpeg video input rawgo.bat: the demo file showing how to parse a ppm image sequence input

####3. Usage

Usage: avt {scene.cut} [track.par]

Note:

  • scene cut file is required.
  • mpeg.cut and raw.cut are two example scene cut files.
  • default.par contains the default tracking parameters.

####4. Scene cut file/ Input

The system can take both MPEG(1,2) or raw frames as input.

NOTE: for raw frames, current version only accept UNIX version PPM format files. The difference between UNIX and Windows version files lies on the explaination of the carriage-return. In UNIX platform, "carriage-return" will be translated as one ASCII symbol "0X0A". In Windows, instead, two ASCII symbols "0X0D, 0X0A". Please make sure you are providing files with correct format.

----mpeg.cut------
MPEG -> Magic
/proj/oscar1/VQ/AVT/TMP -> Output directory
/proj/oscar1/VQ/AVT/akiyo.mpg -> MPEG file name
2 -> Number of shots to track
10 15 -> first shot, start & end frame
30 34 -> second shot, start & end frame

----raw.cut------
RAW -> Magic
2 -> Number of shots to track
/proj/oscar1/VQ/AVT/TMP/akiyo_10/f%.5d.ppm 10 15 -> first shot directory,start&end frame
/proj/oscar1/VQ/AVT/TMP/akiyo_30/f%.5d.ppm 30 34 -> first shot directory,start&end frame

####5. Output

For each frame, avt generates following output files:

  • .seg file: segmented regions drawn in PPM format with mean colors
  • .oif file: segmented regions with their basic features (see next section for detail)

####6. OIF file - Data structure and I/O functions

//
//ObjectFile is an list of ObjectInfo
class ObjectFile : public DArray<ObjectInfo> {
public:

  int available_id ; //used in tracking process to insure unique object ID
  int frame_w, frame_h ;
  //
  //Note: following features are not computed at this version
  float affine[6] ; //global motion paramerter
  float mv[2] ; //average motion vector
  char pan ; //-1: no panning; 0-7: panning direction
  char zoom ; //-1: no zooming; 0: zoom in; 1: zoom out ;

  //
  //Constructor
  ObjectFile(char *) {
    ifstream in(fname) ;

    if(!in) {
      cerr<<"Error: can not open OIF "<<fname<<endl ;
      available_id=0 ;
      return ;
    }

    //
    // Readin head information
    char buf[MAXLEN] ;
    int n ;
    in.read(&n, sizeof(int)) ;
    in.read(&available_id, sizeof(int)) ;
    in.read(&frame_w, sizeof(int)) ;
    in.read(&frame_h, sizeof(int)) ;
    in.read(&affine, sizeof(float)*6) ;
    in.read(&mv, sizeof(float)*2) ;
    in.read(&pan, 1) ;
    in.read(&zoom, 1) ;

    //
    // Readin each object
    for(int i=0; i<n; i++){
      ObjectInfo *obj=new ObjectInfo(in) ;
      add(obj) ;
    }

    in.close() ;
  };

  // Binary format output
	void save(char *fname) {
		ofstream out(fname) ;
		if(!out) {
			cerr<<"Error: can not creat "<<fname<<endl ;
			return ;
		}

		int cnum=number() ;
		out.write(&cnum, sizeof(int) ) ;
		out.write(&available_id, sizeof(int) ) ;
		out.write(&frame_w, sizeof(int) ) ;
		out.write(&frame_h, sizeof(int) ) ;
		out.write(affine, 6*sizeof(float)) ;
		out.write(mv, 2*sizeof(float)) ;
		out.write(&pan, 1) ;
		out.write(&zoom, 1) ;
		for(int i=0; i<number(); i++)
			(*this)[i].save(out) ;

		out.close() ;
  }
} ;


//
//ObjectInfo contains basic features of a segmented region
class ObjectInfo {
public:
  int id ;  //unique ID for this object

  int pixel_num ;
  int ul[2], lr[2] ; // Upper_left and lower_right
  double luv[3] ;  //representative color in L*u*v* space
  int cs ;  //0 for Luv space (always 0 at this version)
  float mv[2] ; //average motion vector
  float affine[6] ; //Affine model parameters
  char  bg ; //1:Background; 0: Foreground **NOT** valid at this version

  unsigned char *mask ; // size=(y1-y0+1)*(x1-x0+1), left_right/top_down
  DArray<int> connected_obj_ids ;

	ObjectInfo(istream& in) {
		int cnum ;
		char tmp ;

		in.read((char *)&id, sizeof(int)) ;
		in.read((char *)&cnum, sizeof(int)) ;
		in.read((char *)&pixel_num, sizeof(int)) ;
		in.read((char *)ul, 2*sizeof(int)) ;
		in.read((char *)lr, 2*sizeof(int)) ;
		in.read(&tmp, 1) ;
		cs=(int)tmp ;
		in.read((char *)luv, 3*sizeof(double)) ;
		in.read((char *)mv, 2*sizeof(float)) ;
		in.read((char *)affine, 6*sizeof(float)) ;
		in.read(&bg, 1) ;

		if(pixel_num) {
			int s=width()*height() ;
			if(s) {
				mask=new unsigned char[s] ;
				in.read((char *)mask, s) ;
			}
		}else {
			mask=NULL ;
		}

    connected_obj_ids=new int[cnum] ;
		for(int i=0; i<cnum; i++){
      int cid ;
			o.read((char *)&cid, sizeof(int)) ;
      connected_obj_ids[i]=cid ;
		}
	}

	void save(ostream& o) {
		int cnum=connected_obj_ids.number() ;

		o.write((char *)&id, sizeof(int)) ;
		o.write((char *)&cnum, sizeof(int)) ;
		o.write((char *)&pixel_num, sizeof(int)) ;
		o.write((char *)ul, 2*sizeof(int)) ;
		o.write((char *)lr, 2*sizeof(int)) ;
		char tmp=(int)cs ;
		o.write(&tmp, 1) ;
		o.write((char *)luv, 3*sizeof(double)) ;
		o.write((char *)mv, 2*sizeof(float)) ;
		o.write((char *)affine, 6*sizeof(float)) ;
		o.write(&bg, 1) ;

		int s=width()*height() ;
		if(pixel_num==0) s=0 ;
		if(s) o.write((char *)mask, s) ;

		for(int i=0; i<cnum; i++){
      s=connected_obj_ids[i] ;
			o.write((char *)&s, sizeof(int)) ;
		}
	}
} ;

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published