Statistics
| Branch: | Revision:

root / Am1.0 / script / gene / make_orf.py @ 8c368a17

History | View | Annotate | Download (1.8 kB)

1

    
2
gene={}
3
with open('transcripts.gtf') as fin:
4
        for line in fin:
5
                lst=line.rstrip().split('\t')
6
                if lst[2]=='transcript':
7
                        gene[lst[8].split('; ')[1].split()[1].replace('"','')]=[lst[0],int(lst[3]),int(lst[4]),lst[6],[],[]]
8

    
9

    
10
with open('best_candidates.gff3') as fin:
11
        for line in fin:
12
                lst=line.rstrip().split('\t')
13
                if len(lst)<9:
14
                        continue
15
                if lst[2]=='CDS':
16
                        n=gene[lst[0]]
17
                        n[4].append(n[1]+int(lst[3])-2)
18
                        n[5].append(n[1]+int(lst[4])-1)
19

    
20

    
21
fout=open('orf','w')
22
fout2=open('orf.struct','w')
23
id=1
24
for n in gene:
25
        if len(gene[n][4])==0: continue
26
        fout.write('{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n'.format(gene[n][0],
27
                gene[n][1],
28
                gene[n][2],
29
                n,
30
                id,
31
                gene[n][3]))
32
        start=max(gene[n][4])
33
        stop=min(gene[n][5])
34
        fout2.write('{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\n'.format(
35
                id,
36
                gene[n][0],
37
                gene[n][3],
38
                start,
39
                stop,
40
                start,
41
                stop,
42
                len(gene[n][4]),
43
                ','.join([str(i) for i in gene[n][4]]),
44
                ','.join([str(i) for i in gene[n][5]]),
45
                n))
46
        id+=1
47
fout.close()
48
fout2.close()
49

    
50

    
51
import os
52
os.system('sort -k1,1 -k2,2n orf > xx')
53
os.system('mv xx orf')
54
os.system('bgzip orf')
55
os.system('tabix -p bed orf.gz')
56

    
57

    
58
print '''
59
drop table if exists orfstruct;
60
create table orfstruct (
61
id int unsigned not null primary key,
62
chrom varchar(255) not null,
63
strand char(1) not null,
64
txStart int unsigned not null,
65
txEnd int unsigned not null,
66
cdsStart int unsigned not null,
67
cdsEnd int unsigned not null,
68
exonCount int unsigned not null,
69
exonStarts text not null,
70
exonEnds text not null,
71
name varchar(255) not null
72
);
73
load data local infile 'orf.struct' into table orfstruct;
74

    
75

    
76
drop table if exists orfsymbol;
77
create table orfsymbol (
78
name varchar(255) not null,
79
symbol varchar(255) null,
80
description text null,
81
id int unsigned not null primary key,
82
index(name)
83
);
84
'''
85