#!/usr/bin/perl
$version="1.0.0";
# ldiff.pl
#
# Copyright (C) 2011 Luigi Cerulo - lcerulo@unisannio.it
# http://rcost.unisannio.it/cerulo
# Departemnt of Science - University of Sannio, Benevento, Italy
# Biogem Institute on genetic research "Gaetano Salvatore", Ariano Irpino (AV), Italy
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
#
#
# TODO: (any suggestions are wellcome!)
# Changelog:
# 26 Jun 2011 (release 1.0.0)
# - first public release
use Getopt::Long qw(:config no_ignore_case);;
$testata="paracne.perl - a parallel version of aracne v. $version\n";
#$testata.="Copyright (C) 2011 Luigi Cerulo - lcerulo@unisannio.it\n";
#$testata.="University of Sannio, Benevento, Italy\n";
#$testata.="http://paracne.sourceforge.net/\n\n";
my $verbose = '';
my $help = '';
my $ifile='';
my $ofile='./aracne-net.txt';
my $ncpu=0;
my $nsplit=0;
my $tmpDir='./tmp';
my $aracneAlgo='fixed_bandwidth',
my $aracneKw=-1;
my $aracneThr=0;
my $aracnePvalue=0.001;
my $aracneBin='./aracne2';
GetOptions (
'v' => \$verbose,
'h|?' => \$help,
'tmp=s' => \$tmpDir,
'c=i' => \$ncpu,
's=i' => \$nsplit,
'i=s' => \$ifile,
'o=s' => \$ofile,
'abin=s' => \$aracneBin,
'a=s' => \$aracneAlgo,
'k=f' => \$aracneKw,
't=f' => \$aracneThr,
'p=f' => \$aracnePvalue
) or usage();
print $testata if ($verbose);
if ($help) {
usage();
}
# ----------------------------------------------
# ************** cmd line parsing **************
# ----------------------------------------------
if (!$ifile) {
print "INPUT ERROR: no input file specified!...\n\n";
usage();
}
if ($ncpu==0) {
$ncpu=num_cpus();
print "detected $ncpu CPUs on this system\n";
} else {
print "adopting, if available, a maximum of $ncpu CPUs\n";
}
# ----------------------------------------------
# ******************** MAIN ********************
# ----------------------------------------------
# preparing tmp dir
if (-e $tmpDir) {
print "cleaning tmp dir..." if $verbose;
`rm -rf $tmpDir`;
`mkdir $tmpDir`;
print "done\n" if ($verbose);
} else {
print "creating tmp dir in $tmpDir..." if ($verbose);
`mkdir $tmpDir`;
print "done\n" if ($verbose);
}
if (-e $ifile) {
@probeset=`cut -f1 $ifile | tail -n+2`;
$nprobes=$#probeset+1;
if ($nsplit <= 0) {
$nsplit=int($nprobes/300)+1; #$ncpu*2;
if ($nsplit<=1) {
$nsplit=2;
}
print "for $nprobes probes and $ncpu CPUs, optimal number of splits set to $nsplit\n" if ($verbose);
}
$nprobf=int($nprobes/$nsplit);
print "splitting probes into $nsplit parts " if ($verbose);
print "each with about $nprobf probes..." if ($verbose);
`cut -f1 $ifile | tail -n+2 | split -l $nprobf - $tmpDir/spl`;
my @lsspl=`ls $tmpDir/spl*`;
open OUTJ, ">./job-list.txt";
my $totjobs=0;
for my $i (0..$#lsspl) {
$lsspl[$i] =~ s/[\n\r]//g;
print OUTJ "$aracneBin -p $aracnePvalue -i $ifile -s $lsspl[$i] -o $tmpDir/aracne-subnet-$i.txt\n";
$totjobs++;
}
close OUTJ;
print "done\n" if ($verbose);
print "launching a total of $totjobs jobs with maximum $ncpu jobs in parallel...\n" if ($verbose);
`xjobs -j $ncpu < ./job-list.txt`;
print "assembling results..." if ($verbose);
open OUTA, ">$ofile";
%dataSet=();
my @lsspl=`ls $tmpDir/spl*`;
for my $i (0..$#lsspl) {
@cat_subnet=`cat $tmpDir/aracne-subnet-$i.txt`;
for my $l(@cat_subnet) {
$l=~s/[\n\r]//g;
if ($l =~ /^\>/ && $i==0) {
if ($l =~ /^\> Input file/) {
$l="> Input file $ifile";
}
if ($l =~ /^\> Output file/) {
$l="> Output file $ofile";
}
if ($l =~ /^\> Subnetwork file/) {
$l="> Subnetwork file";
}
print OUTA "$l\n";
}
if ($l !~ /^\>/) {
@parts=split('\t',$l);
foreach my $i (0..($#parts/2-1)) {
$probe1=$parts[0];
$probe2=$parts[$i*2+1];
$mi=$parts[$i*2+2];
$dataSet{$probe1}{$probe2}=$mi;
}
}
}
}
foreach my $p1(@probeset) {
$p1=~s/[\n\r]//g;
print OUTA "$p1";
foreach my $p2(@probeset) {
$p2=~s/[\n\r]//g;
if (($p1 ne $p2) && ($dataSet{$p1}{$p2} ne '')) {
print OUTA "\t$p2\t$dataSet{$p1}{$p2}";
}
}
print OUTA "\n";
}
close OUTA;
print "done\n" if ($verbose);
} else {
print "INPUT ERROR: no input file $ifile found!...\n\n";
}
exit;
# ----------------------------------------------
# ****************** functions *****************
# ----------------------------------------------
sub usage() {
print "$testata";
print "\n";
print " usage: $0 [-hv] [-c ncpu] [-s nsplit] [-i input] [-o output]\n";
print "\n";
print " -h : This (help) message\n";
print " -v : Verbose output\n";
print " -c : The max number of cpus to be used, 0 (default) indicates all available cpus\n";
print " -s : The number of splits of the data matix, 0 (defaults) is determined automaticaly\n";
print " on the basis of available cpus or the max number fixed with -c\n";
print "\n";
print " -a : [ARACNE parameter] Algorithm adopted fixed\_bandwidth | variable\_bandwidth | adaptive\_partitioning\n";
print " default: fixed\_bandwidth\n";
print " -k : [ARACNE parameter] Kernel width (accurate method only), default: determined by program\n";
print " -t : [ARACNE parameter] MI threshold, default: 0\n";
print " -p : [ARACNE parameter] P-value for MI threshold (e.g. 1e-7), default: 1\n";
print "\n\n";
exit;
}
sub num_cpus {
@osys=`uname`;
$osys=$osys[0];
if ($osys =~ /Linux/) {
@catproc=`grep -c ^processor /proc/cpuinfo`;
return($catproc[0]+0);
}
return(1);
}
sub is_installed {
my($module) = @_;
(my $filename = $module) =~ s@::@/@g; # 1
return eval { require $filename }; # 2
}