#Perl convertor MDL .mol to CML - Version 1.4 - schema version

#!/usr/local/bin/perl 

###########################
##   Read in the input   ##
###########################

# make sure input file is defined

if (1*@ARGV==0) { 
	print "No filename supplied.";
}

else {


# locate the file and define the output file

$INPUT_FILE = "@ARGV[0]" . ".mol";
$OUTPUT_FILE = "@ARGV[0]" . "_s" . ".xml";

# make sure input file is present

if (!-e $INPUT_FILE) {
	print "The file $INPUT_FILE doesn't exist.";
}

else {


# read the input file into @array

open(INPUT_FILE);
@array = <INPUT_FILE>;
close(INPUT_FILE);

foreach (@array) {

$wholefile = $wholefile.$_;

}


# entire input file is now contained 
# in scalar variable $wholefile

# get the arguments and put them in variables
	$file = @ARGV[0];
	$author = @ARGV[1];	
	$date = @ARGV[2];
	
############################
##   Sort out the input   ##
############################


    chomp($wholefile);
    chomp($wholefile);
    chomp($wholefile);
    chomp($wholefile);
    chomp($wholefile);


              
        # Hack mol file
        # Add info head to cml at the beginning of the file
        $wholefile =~ s£^£<?xml version="1.0"?>\n<!--<?xml-stylesheet type="text/xsl" href="generic.xsl" ?>-->\n\n<document>\n<!-- CML document - $file - $author - $date -->\n<!-- file converted from: MDL .mol -->\n<cml title="$file" id="cml_${file}_${author}" xmlns="x-schema:cml_schema_ie_02.xml">\n\t<molecule title="$file" id="mol_${file}_${author}">\n\t\t<atomArray>\n£;
	

	# - count number of atoms	
	@no_of_atoms=$wholefile =~ m!(?=\n)\s+[-\d\.]+\s+[-\d\.]+\s+[-\d\.]+\s+[A-Za-z]+[^\n]*(?=\n)!gmx;

	$no_of_atoms=@no_of_atoms +1;

	# - count number of bonds	
	@no_of_bonds=$wholefile =~ m!(?=\n)\s+([\d]+)\s+([\d]+)\s+([\d\.]+)[^\n]*(?=(\n||$))!gmx;

	$no_of_bonds=@no_of_bonds +1;
	

	# Markup atom data

    	for ($i=1;$i<=$no_of_atoms;$i++){
    	$wholefile =~ s!(?=\n)\s+([-\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)\s+([A-Za-z]+)[^\n]*(?=\n)
    		!\n\t\t\t<atom id="${file}_${author}_a_$i">\n\t\t\t\t<float builtin="x3" units="A">\1</float>\n\t\t\t\t<float builtin="y3" units="A">\2</float>\n\t\t\t\t<float builtin="z3" units="A">\3</float>\n\t\t\t\t<string builtin="elementType">\4</string>\n\t\t\t</atom>!x;
	}

	# Get rid of gumph at beginning
      	$wholefile =~ s!<atomArray>[^<]*<atom!<atomArray>\n\t\t\t<atom!s;  
      	
	# Markup bond data
    	for ($i=1;$i<=$no_of_bonds;$i++){
    	$wholefile =~ s!(?=\n)\s+([\d]+)\s+([\d]+)\s+([\d\.]+)[^\n]*(?=(\n||$))
    		!\n\t\t\t<bond id="${file}_${author}_b_$i">\n\t\t\t\t<string builtin="atomRef">${file}_${author}_a_\1</string>\n\t\t\t\t<string builtin="atomRef">${file}_${author}_a_\2</string>\n\t\t\t\t<string builtin="order" convention="MDL">\3</string>\n\t\t\t</bond>!x;
	}

      	# Put end tags at the end of the file
      	$wholefile =~ s!$!\n\t\t</bondArray>\n\t</molecule>\n</cml>\n</document>!;

	# Put tags in the middle between atom and bond
      	$wholefile =~ s!</atom>\s+<bond!</atom>\n\t\t</atomArray>\n\t\t<bondArray>\n\t\t\t<bond!s;
			
	# Delete M  END at the end of the file
      	$wholefile =~ s!\nM\s+END!!;

	# remove stupid bond orders
	$wholefile =~ s!convention="MDL">0</integer>!convention="MDL">1</integer>!gx;
    	
      	
# write the final version to file

open(OUTPUT_FILE,">$OUTPUT_FILE"); 
print OUTPUT_FILE ($wholefile);
close(OUTPUT_FILE)
#

}
}