#!/usr/bin/perl # Name: contig_target.pl # Author: Pi-hsia Su # Date: Oct. 23, 2002 use strict; use Getopt::Long; my $seq_file=""; my $assembly_all=""; my $trimmed_all=""; if ($#ARGV < 0) { &usage(); } GetOptions ("x=s" => \$seq_file, "a=s" => \$assembly_all); if (!$seq_file || !$assembly_all) { &usage(); } my $homedir = `pwd`; chomp $homedir; my $dirname = "$seq_file.load"; my $assem_dir = "$seq_file"; my @ID=(); my @target=(); my @exclude=(); my @contig_len = (); &Check_dir(); `mkdir "$seq_file.load"`; &Copy_all_files(); &ID_info(); &Transfer_seq(); chdir "$dirname"; opendir DIR, "."; my @assembly = readdir(DIR); my $temp1 = shift(@assembly); $temp1 = shift(@assembly); my $assem=""; my $seq_id=""; my $len=0; my $seq=""; &Process(); print "\n"; print "Finished primer design.\n"; &Clean_all(); &Clean_others(); closedir DIR; chdir "$homedir"; opendir HOME, "."; # `rmdir "$seq_file.load"`; closedir HOME; sub usage { print < -a contains 8 columns(tab delimited): ID,L,M,R,range(=R-L),Lx,Rx,length, assembly tab-delimited file (from phpMyAdmin), END exit; } sub Copy_all_files { `cp $homedir/$assembly_all $homedir/$dirname/$assembly_all`; } sub Check_dir { if ((-e "$seq_file.out") == 1) { print "\nPlease rename the existing $seq_file.out using mv.\n\n"; exit; } if ((-e "$seq_file.load") == 1) { print "\nPlease rename the existing $seq_file.load using mv.\n\n"; exit; } } sub ID_info { my $x = 0; my @input_id = (); my @target_L = (); my @target_M = (); my @target_R = (); my @target_len = (); my @target_range = (); my @target_set = (); my @target_Excl_L = (); my @target_Excl_R = (); my @Excl_L = (); my @Excl_R = (); my @Excl_R_range = (); my @exclude_set = (); open (IN, $seq_file) || die ("Can't open file: $seq_file $! \n"); while () { if (($_ =~ /\w/) && ($_ =~ /C/)) { chomp; $x++; ($input_id[$x], $target_L[$x], $target_M[$x], $target_R[$x], $target_len[$x], $target_Excl_L[$x], $target_Excl_R[$x], $contig_len[$x]) = split (/\t/, $_); s/ //g for $input_id[$x]; s/ //g for $target_L[$x]; s/ //g for $target_M[$x]; s/ //g for $target_R[$x]; s/ //g for $target_len[$x]; s/ //g for $target_Excl_L[$x]; s/ //g for $target_Excl_R[$x]; s/ //g for $contig_len[$x]; print "\nInput record $x: \t$input_id[$x]\n"; push @ID, $input_id[$x]; $target_range[$x]=$target_R[$x]-$target_L[$x]; $target_set[$x] = "$target_L[$x]".","."$target_range[$x]"." "; push @target, $target_set[$x]; # print "Target $x: $target_set[$x]\n"; $Excl_L[$x] = "1,"."$target_Excl_L[$x]"." "; $Excl_R_range[$x] = $contig_len[$x]-$target_Excl_R[$x]; $Excl_R[$x] = "$target_Excl_R[$x]".","."$Excl_R_range[$x]"." "; $exclude_set[$x] = "$Excl_L[$x]"." "."$Excl_R[$x]"; push @exclude, $exclude_set[$x]; # print "Exclude $x: $exclude_set[$x]\n"; } } close IN; } sub Transfer_seq { my $name=""; my $count=0; print "\n"; chdir "$dirname"; opendir DIR, "."; foreach $name (@ID) { $count++; open (OUT, ">$name.assem"); open (DBASSEM, $assembly_all); while () { chomp; my ($id, $len, $seq) = split (/\t/, $_); print OUT $_ if ($name eq $id); } print ".","$count"; close DBASSEM; close DBTRIM; close OUT; } print "\nFinished transfering sequence infomation.\n"; closedir DIR; } sub Process { my $y=0; my $z=0; foreach $assem (@ID) { chomp $assem; open (ASSEM, "$assem.assem"); while () { chomp; ($seq_id, $len, $seq) = split (/\t/, $_); } $y++; &Primer_target(1, $seq, $len, $target[$z], $exclude[$z]); `primer3_core < $seq_id.primer.1 > $seq_id.plist.1`; &Table_heading() if ((-e "$assem_dir.out") == 0); &Primer_table($y, "$seq_id.plist.1", $len, "", $target[$z], $exclude[$z]); $z++; print "=","$y"; } } sub Clean_all { unlink $assembly_all; } sub Clean_others { `cp "$assem_dir.out" ".."`; # ` rm *.*`; } sub Primer_target { my ($n, $sequence, $seq_len, $p_target, $p_exclude) = @_; open (LIST, ">$seq_id.primer.$n"); print LIST "PRIMER_SEQUENCE_ID=$seq_id\n"; print LIST "SEQUENCE=$sequence\n"; print LIST "PRIMER_COMMENT=$seq_len\n"; print LIST "TARGET=", $p_target, "\n"; print LIST "EXCLUDED_REGION=", $p_exclude, "\n"; print LIST "PRIMER_PRODUCT_SIZE_RANGE=100-400\n"; print LIST "PRIMER_MAX_POLY_X=3\n"; print LIST "=\n"; close LIST; } sub Table_heading { my $heading = "Input_Record\tID\tLength\tTarget\tExclude\tPair_No\tPrimer_L_Seq\tPrimer_R_Seq\tPrimer_L_Pos_Len\tPrimer_R_Pos_Len\tPrimer_L_Tm\tPrimer_R_Tm\tPrimer_L_GC\tPrimer_R_GC\tProduct_Size\n"; open (TH, ">$assem_dir.out"); print TH $heading; close TH; } sub Primer_table { my ($record, $input, $t_len, $t_ssr, $t_target, $t_exclude) = @_; my $t_id=""; open (PT, ">>$assem_dir.out"); open (IN, "$input") if (-e "$input"); while () { if (/^PRIMER_SEQUENCE_ID=(\S+)/) { $t_id = $1; print PT "$record\t"; print PT "$t_id\t"; print PT "$t_len\t"; print PT "$t_target\t"; print PT "$t_exclude\t"; } elsif (/^PRIMER_LEFT_SEQUENCE=(\S+)/) { print PT "1\t"; print PT "$1\t"; } elsif (/^PRIMER_RIGHT_SEQUENCE=(\S+)/) { print PT "$1\t"; } elsif (/^PRIMER_LEFT=(\d+\,\d+)/) { print PT "$1\t"; } elsif (/^PRIMER_RIGHT=(\d+\,\d+)/) { print PT "$1\t"; } elsif (/^PRIMER_LEFT_TM=(\d+\.\d+)/) { print PT "$1\t"; } elsif (/^PRIMER_RIGHT_TM=(\d+\.\d+)/) { print PT "$1\t"; } elsif (/^PRIMER_LEFT_GC_PERCENT=(\d+\.\d+)/) { print PT "$1\t"; } elsif (/^PRIMER_RIGHT_GC_PERCENT=(\d+\.\d+)/) { print PT "$1\t"; } elsif (/^PRIMER_PRODUCT_SIZE=(\d+)/) { print PT "$1\n"; } elsif (/^PRIMER_LEFT_(\d)_SEQUENCE=(\S+)/) { print PT "$record\t"; print PT "$t_id\t"; print PT "$t_len\t"; print PT "$t_target\t"; print PT "$t_exclude\t"; print PT ($1+1), "\t$2\t"; } elsif (/^PRIMER_RIGHT_\d_SEQUENCE=(\S+)/) { print PT "$1\t"; } elsif (/^PRIMER_LEFT_\d=(\d+\,\d+)/) { print PT "$1\t"; } elsif (/^PRIMER_RIGHT_\d=(\d+\,\d+)/) { print PT "$1\t"; } elsif (/^PRIMER_LEFT_\d_TM=(\d+\.\d+)/) { print PT "$1\t"; } elsif (/^PRIMER_RIGHT_\d_TM=(\d+\.\d+)/) { print PT "$1\t"; } elsif (/^PRIMER_LEFT_\d_GC_PERCENT=(\d+\.\d+)/) { print PT "$1\t"; } elsif (/^PRIMER_RIGHT_\d_GC_PERCENT=(\d+\.\d+)/) { print PT "$1\t"; } elsif (/^PRIMER_PRODUCT_SIZE_\d=(\d+)/) { print PT "$1\n"; } } print PT "\n"; close IN; close PT; } 1;