Statistics
| Branch: | Revision:

root / Am1.0 / script / gene / make_glean.py @ 71d28e6f

History | View | Annotate | Download (1.5 kB)

1 8c368a17 Daofeng Li
gene={}
2 8c368a17 Daofeng Li
with open('snapdragon.scafseq.fa.Glean.filter.final.gff') as fin:
3 8c368a17 Daofeng Li
        for line in fin:
4 8c368a17 Daofeng Li
                lst=line.rstrip().split('\t')
5 8c368a17 Daofeng Li
                n=lst[8].split('=')[1].split(';')[0]
6 8c368a17 Daofeng Li
                if lst[2]=='mRNA':
7 8c368a17 Daofeng Li
                        gene[n]=[lst[0],int(lst[3])-1,lst[4],lst[6],[],[]]
8 8c368a17 Daofeng Li
                else:
9 8c368a17 Daofeng Li
                        gene[n][4].append(int(lst[3])-1)
10 8c368a17 Daofeng Li
                        gene[n][5].append(int(lst[4]))
11 8c368a17 Daofeng Li
12 8c368a17 Daofeng Li
fout=open('Glean','w')
13 8c368a17 Daofeng Li
fout2=open('Gleanstruct','w')
14 8c368a17 Daofeng Li
id=1
15 8c368a17 Daofeng Li
16 8c368a17 Daofeng Li
for n in gene:
17 8c368a17 Daofeng Li
        lst=gene[n]
18 8c368a17 Daofeng Li
        fout.write('{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n'.format(lst[0],lst[1],lst[2],n,id,lst[3]))
19 8c368a17 Daofeng Li
        fout2.write('{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\n'.format(id,lst[0],lst[3],lst[1],lst[2],
20 8c368a17 Daofeng Li
                min(lst[4]),
21 8c368a17 Daofeng Li
                max(lst[5]),
22 8c368a17 Daofeng Li
                len(lst[4]),
23 8c368a17 Daofeng Li
                ','.join([str(i) for i in lst[4]]),
24 8c368a17 Daofeng Li
                ','.join([str(i) for i in lst[5]]),
25 8c368a17 Daofeng Li
                n))
26 8c368a17 Daofeng Li
        id+=1
27 8c368a17 Daofeng Li
fout.close();
28 8c368a17 Daofeng Li
fout2.close();
29 8c368a17 Daofeng Li
30 8c368a17 Daofeng Li
import os
31 8c368a17 Daofeng Li
os.system('sort -k1,1 -k2,2n Glean > xx')
32 8c368a17 Daofeng Li
os.system('mv xx Glean')
33 8c368a17 Daofeng Li
os.system('bgzip Glean')
34 8c368a17 Daofeng Li
os.system('tabix -p bed Glean.gz')
35 8c368a17 Daofeng Li
36 8c368a17 Daofeng Li
print '''
37 8c368a17 Daofeng Li
drop table if exists Gleanstruct;
38 8c368a17 Daofeng Li
create table Gleanstruct (
39 8c368a17 Daofeng Li
id int unsigned not null primary key,
40 8c368a17 Daofeng Li
chrom varchar(255) not null,
41 8c368a17 Daofeng Li
strand char(1) not null,
42 8c368a17 Daofeng Li
txStart int unsigned not null,
43 8c368a17 Daofeng Li
txEnd int unsigned not null,
44 8c368a17 Daofeng Li
cdsStart int unsigned not null,
45 8c368a17 Daofeng Li
cdsEnd int unsigned not null,
46 8c368a17 Daofeng Li
exonCount int unsigned not null,
47 8c368a17 Daofeng Li
exonStarts text not null,
48 8c368a17 Daofeng Li
exonEnds text not null,
49 8c368a17 Daofeng Li
name varchar(255) not null
50 8c368a17 Daofeng Li
);
51 8c368a17 Daofeng Li
load data local infile 'Glean.struct' into table Gleanstruct;
52 8c368a17 Daofeng Li
53 8c368a17 Daofeng Li
drop table if exists Gleansymbol;
54 8c368a17 Daofeng Li
create table Gleansymbol (
55 8c368a17 Daofeng Li
name varchar(255) not null,
56 8c368a17 Daofeng Li
symbol varchar(255) null,
57 8c368a17 Daofeng Li
description text null,
58 8c368a17 Daofeng Li
id int unsigned not null primary key,
59 8c368a17 Daofeng Li
index(name)
60 8c368a17 Daofeng Li
);
61 8c368a17 Daofeng Li
62 8c368a17 Daofeng Li
'''