#!/usr/bin/perl $version="1.0.0"; # ldiff.pl # # Copyright (C) 2011 Luigi Cerulo - lcerulo@unisannio.it # http://rcost.unisannio.it/cerulo # Departemnt of Science - University of Sannio, Benevento, Italy # Biogem Institute on genetic research "Gaetano Salvatore", Ariano Irpino (AV), Italy # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # # # TODO: (any suggestions are wellcome!) # Changelog: # 26 Jun 2011 (release 1.0.0) # - first public release use Getopt::Long qw(:config no_ignore_case);; $testata="paracne.perl - a parallel version of aracne v. $version\n"; #$testata.="Copyright (C) 2011 Luigi Cerulo - lcerulo@unisannio.it\n"; #$testata.="University of Sannio, Benevento, Italy\n"; #$testata.="http://paracne.sourceforge.net/\n\n"; my $verbose = ''; my $help = ''; my $ifile=''; my $ofile='./aracne-net.txt'; my $ncpu=0; my $nsplit=0; my $tmpDir='./tmp'; my $aracneAlgo='fixed_bandwidth', my $aracneKw=-1; my $aracneThr=0; my $aracnePvalue=0.001; my $aracneBin='./aracne2'; GetOptions ( 'v' => \$verbose, 'h|?' => \$help, 'tmp=s' => \$tmpDir, 'c=i' => \$ncpu, 's=i' => \$nsplit, 'i=s' => \$ifile, 'o=s' => \$ofile, 'abin=s' => \$aracneBin, 'a=s' => \$aracneAlgo, 'k=f' => \$aracneKw, 't=f' => \$aracneThr, 'p=f' => \$aracnePvalue ) or usage(); print $testata if ($verbose); if ($help) { usage(); } # ---------------------------------------------- # ************** cmd line parsing ************** # ---------------------------------------------- if (!$ifile) { print "INPUT ERROR: no input file specified!...\n\n"; usage(); } if ($ncpu==0) { $ncpu=num_cpus(); print "detected $ncpu CPUs on this system\n"; } else { print "adopting, if available, a maximum of $ncpu CPUs\n"; } # ---------------------------------------------- # ******************** MAIN ******************** # ---------------------------------------------- # preparing tmp dir if (-e $tmpDir) { print "cleaning tmp dir..." if $verbose; `rm -rf $tmpDir`; `mkdir $tmpDir`; print "done\n" if ($verbose); } else { print "creating tmp dir in $tmpDir..." if ($verbose); `mkdir $tmpDir`; print "done\n" if ($verbose); } if (-e $ifile) { @probeset=`cut -f1 $ifile | tail -n+2`; $nprobes=$#probeset+1; if ($nsplit <= 0) { $nsplit=int($nprobes/300)+1; #$ncpu*2; if ($nsplit<=1) { $nsplit=2; } print "for $nprobes probes and $ncpu CPUs, optimal number of splits set to $nsplit\n" if ($verbose); } $nprobf=int($nprobes/$nsplit); print "splitting probes into $nsplit parts " if ($verbose); print "each with about $nprobf probes..." if ($verbose); `cut -f1 $ifile | tail -n+2 | split -l $nprobf - $tmpDir/spl`; my @lsspl=`ls $tmpDir/spl*`; open OUTJ, ">./job-list.txt"; my $totjobs=0; for my $i (0..$#lsspl) { $lsspl[$i] =~ s/[\n\r]//g; print OUTJ "$aracneBin -p $aracnePvalue -i $ifile -s $lsspl[$i] -o $tmpDir/aracne-subnet-$i.txt\n"; $totjobs++; } close OUTJ; print "done\n" if ($verbose); print "launching a total of $totjobs jobs with maximum $ncpu jobs in parallel...\n" if ($verbose); `xjobs -j $ncpu < ./job-list.txt`; print "assembling results..." if ($verbose); open OUTA, ">$ofile"; %dataSet=(); my @lsspl=`ls $tmpDir/spl*`; for my $i (0..$#lsspl) { @cat_subnet=`cat $tmpDir/aracne-subnet-$i.txt`; for my $l(@cat_subnet) { $l=~s/[\n\r]//g; if ($l =~ /^\>/ && $i==0) { if ($l =~ /^\> Input file/) { $l="> Input file $ifile"; } if ($l =~ /^\> Output file/) { $l="> Output file $ofile"; } if ($l =~ /^\> Subnetwork file/) { $l="> Subnetwork file"; } print OUTA "$l\n"; } if ($l !~ /^\>/) { @parts=split('\t',$l); foreach my $i (0..($#parts/2-1)) { $probe1=$parts[0]; $probe2=$parts[$i*2+1]; $mi=$parts[$i*2+2]; $dataSet{$probe1}{$probe2}=$mi; } } } } foreach my $p1(@probeset) { $p1=~s/[\n\r]//g; print OUTA "$p1"; foreach my $p2(@probeset) { $p2=~s/[\n\r]//g; if (($p1 ne $p2) && ($dataSet{$p1}{$p2} ne '')) { print OUTA "\t$p2\t$dataSet{$p1}{$p2}"; } } print OUTA "\n"; } close OUTA; print "done\n" if ($verbose); } else { print "INPUT ERROR: no input file $ifile found!...\n\n"; } exit; # ---------------------------------------------- # ****************** functions ***************** # ---------------------------------------------- sub usage() { print "$testata"; print "\n"; print " usage: $0 [-hv] [-c ncpu] [-s nsplit] [-i input] [-o output]\n"; print "\n"; print " -h : This (help) message\n"; print " -v : Verbose output\n"; print " -c : The max number of cpus to be used, 0 (default) indicates all available cpus\n"; print " -s : The number of splits of the data matix, 0 (defaults) is determined automaticaly\n"; print " on the basis of available cpus or the max number fixed with -c\n"; print "\n"; print " -a : [ARACNE parameter] Algorithm adopted fixed\_bandwidth | variable\_bandwidth | adaptive\_partitioning\n"; print " default: fixed\_bandwidth\n"; print " -k : [ARACNE parameter] Kernel width (accurate method only), default: determined by program\n"; print " -t : [ARACNE parameter] MI threshold, default: 0\n"; print " -p : [ARACNE parameter] P-value for MI threshold (e.g. 1e-7), default: 1\n"; print "\n\n"; exit; } sub num_cpus { @osys=`uname`; $osys=$osys[0]; if ($osys =~ /Linux/) { @catproc=`grep -c ^processor /proc/cpuinfo`; return($catproc[0]+0); } return(1); } sub is_installed { my($module) = @_; (my $filename = $module) =~ s@::@/@g; # 1 return eval { require $filename }; # 2 }