Statistics
| Branch: | Revision:

root / Am1.0 / script / gene / make_orf.py @ 71d28e6f

History | View | Annotate | Download (1.8 kB)

1 8c368a17 Daofeng Li
2 8c368a17 Daofeng Li
gene={}
3 8c368a17 Daofeng Li
with open('transcripts.gtf') as fin:
4 8c368a17 Daofeng Li
        for line in fin:
5 8c368a17 Daofeng Li
                lst=line.rstrip().split('\t')
6 8c368a17 Daofeng Li
                if lst[2]=='transcript':
7 8c368a17 Daofeng Li
                        gene[lst[8].split('; ')[1].split()[1].replace('"','')]=[lst[0],int(lst[3]),int(lst[4]),lst[6],[],[]]
8 8c368a17 Daofeng Li
9 8c368a17 Daofeng Li
10 8c368a17 Daofeng Li
with open('best_candidates.gff3') as fin:
11 8c368a17 Daofeng Li
        for line in fin:
12 8c368a17 Daofeng Li
                lst=line.rstrip().split('\t')
13 8c368a17 Daofeng Li
                if len(lst)<9:
14 8c368a17 Daofeng Li
                        continue
15 8c368a17 Daofeng Li
                if lst[2]=='CDS':
16 8c368a17 Daofeng Li
                        n=gene[lst[0]]
17 8c368a17 Daofeng Li
                        n[4].append(n[1]+int(lst[3])-2)
18 8c368a17 Daofeng Li
                        n[5].append(n[1]+int(lst[4])-1)
19 8c368a17 Daofeng Li
20 8c368a17 Daofeng Li
21 8c368a17 Daofeng Li
fout=open('orf','w')
22 8c368a17 Daofeng Li
fout2=open('orf.struct','w')
23 8c368a17 Daofeng Li
id=1
24 8c368a17 Daofeng Li
for n in gene:
25 8c368a17 Daofeng Li
        if len(gene[n][4])==0: continue
26 8c368a17 Daofeng Li
        fout.write('{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n'.format(gene[n][0],
27 8c368a17 Daofeng Li
                gene[n][1],
28 8c368a17 Daofeng Li
                gene[n][2],
29 8c368a17 Daofeng Li
                n,
30 8c368a17 Daofeng Li
                id,
31 8c368a17 Daofeng Li
                gene[n][3]))
32 8c368a17 Daofeng Li
        start=max(gene[n][4])
33 8c368a17 Daofeng Li
        stop=min(gene[n][5])
34 8c368a17 Daofeng Li
        fout2.write('{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\n'.format(
35 8c368a17 Daofeng Li
                id,
36 8c368a17 Daofeng Li
                gene[n][0],
37 8c368a17 Daofeng Li
                gene[n][3],
38 8c368a17 Daofeng Li
                start,
39 8c368a17 Daofeng Li
                stop,
40 8c368a17 Daofeng Li
                start,
41 8c368a17 Daofeng Li
                stop,
42 8c368a17 Daofeng Li
                len(gene[n][4]),
43 8c368a17 Daofeng Li
                ','.join([str(i) for i in gene[n][4]]),
44 8c368a17 Daofeng Li
                ','.join([str(i) for i in gene[n][5]]),
45 8c368a17 Daofeng Li
                n))
46 8c368a17 Daofeng Li
        id+=1
47 8c368a17 Daofeng Li
fout.close()
48 8c368a17 Daofeng Li
fout2.close()
49 8c368a17 Daofeng Li
50 8c368a17 Daofeng Li
51 8c368a17 Daofeng Li
import os
52 8c368a17 Daofeng Li
os.system('sort -k1,1 -k2,2n orf > xx')
53 8c368a17 Daofeng Li
os.system('mv xx orf')
54 8c368a17 Daofeng Li
os.system('bgzip orf')
55 8c368a17 Daofeng Li
os.system('tabix -p bed orf.gz')
56 8c368a17 Daofeng Li
57 8c368a17 Daofeng Li
58 8c368a17 Daofeng Li
print '''
59 8c368a17 Daofeng Li
drop table if exists orfstruct;
60 8c368a17 Daofeng Li
create table orfstruct (
61 8c368a17 Daofeng Li
id int unsigned not null primary key,
62 8c368a17 Daofeng Li
chrom varchar(255) not null,
63 8c368a17 Daofeng Li
strand char(1) not null,
64 8c368a17 Daofeng Li
txStart int unsigned not null,
65 8c368a17 Daofeng Li
txEnd int unsigned not null,
66 8c368a17 Daofeng Li
cdsStart int unsigned not null,
67 8c368a17 Daofeng Li
cdsEnd int unsigned not null,
68 8c368a17 Daofeng Li
exonCount int unsigned not null,
69 8c368a17 Daofeng Li
exonStarts text not null,
70 8c368a17 Daofeng Li
exonEnds text not null,
71 8c368a17 Daofeng Li
name varchar(255) not null
72 8c368a17 Daofeng Li
);
73 8c368a17 Daofeng Li
load data local infile 'orf.struct' into table orfstruct;
74 8c368a17 Daofeng Li
75 8c368a17 Daofeng Li
76 8c368a17 Daofeng Li
drop table if exists orfsymbol;
77 8c368a17 Daofeng Li
create table orfsymbol (
78 8c368a17 Daofeng Li
name varchar(255) not null,
79 8c368a17 Daofeng Li
symbol varchar(255) null,
80 8c368a17 Daofeng Li
description text null,
81 8c368a17 Daofeng Li
id int unsigned not null primary key,
82 8c368a17 Daofeng Li
index(name)
83 8c368a17 Daofeng Li
);
84 8c368a17 Daofeng Li
'''