root / Am1.0 / script / gene / make_orf.py @ 71d28e6f
History | View | Annotate | Download (1.8 kB)
1 |
|
---|---|
2 |
gene={} |
3 |
with open('transcripts.gtf') as fin: |
4 |
for line in fin: |
5 |
lst=line.rstrip().split('\t')
|
6 |
if lst[2]=='transcript': |
7 |
gene[lst[8].split('; ')[1].split()[1].replace('"','')]=[lst[0],int(lst[3]),int(lst[4]),lst[6],[],[]] |
8 |
|
9 |
|
10 |
with open('best_candidates.gff3') as fin: |
11 |
for line in fin: |
12 |
lst=line.rstrip().split('\t')
|
13 |
if len(lst)<9: |
14 |
continue
|
15 |
if lst[2]=='CDS': |
16 |
n=gene[lst[0]]
|
17 |
n[4].append(n[1]+int(lst[3])-2) |
18 |
n[5].append(n[1]+int(lst[4])-1) |
19 |
|
20 |
|
21 |
fout=open('orf','w') |
22 |
fout2=open('orf.struct','w') |
23 |
id=1
|
24 |
for n in gene: |
25 |
if len(gene[n][4])==0: continue |
26 |
fout.write('{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n'.format(gene[n][0], |
27 |
gene[n][1],
|
28 |
gene[n][2],
|
29 |
n, |
30 |
id,
|
31 |
gene[n][3]))
|
32 |
start=max(gene[n][4]) |
33 |
stop=min(gene[n][5]) |
34 |
fout2.write('{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\n'.format(
|
35 |
id,
|
36 |
gene[n][0],
|
37 |
gene[n][3],
|
38 |
start, |
39 |
stop, |
40 |
start, |
41 |
stop, |
42 |
len(gene[n][4]), |
43 |
','.join([str(i) for i in gene[n][4]]), |
44 |
','.join([str(i) for i in gene[n][5]]), |
45 |
n)) |
46 |
id+=1 |
47 |
fout.close() |
48 |
fout2.close() |
49 |
|
50 |
|
51 |
import os |
52 |
os.system('sort -k1,1 -k2,2n orf > xx')
|
53 |
os.system('mv xx orf')
|
54 |
os.system('bgzip orf')
|
55 |
os.system('tabix -p bed orf.gz')
|
56 |
|
57 |
|
58 |
print ''' |
59 |
drop table if exists orfstruct;
|
60 |
create table orfstruct (
|
61 |
id int unsigned not null primary key,
|
62 |
chrom varchar(255) not null,
|
63 |
strand char(1) not null,
|
64 |
txStart int unsigned not null,
|
65 |
txEnd int unsigned not null,
|
66 |
cdsStart int unsigned not null,
|
67 |
cdsEnd int unsigned not null,
|
68 |
exonCount int unsigned not null,
|
69 |
exonStarts text not null,
|
70 |
exonEnds text not null,
|
71 |
name varchar(255) not null
|
72 |
);
|
73 |
load data local infile 'orf.struct' into table orfstruct;
|
74 |
|
75 |
|
76 |
drop table if exists orfsymbol;
|
77 |
create table orfsymbol (
|
78 |
name varchar(255) not null,
|
79 |
symbol varchar(255) null,
|
80 |
description text null,
|
81 |
id int unsigned not null primary key,
|
82 |
index(name)
|
83 |
);
|
84 |
'''
|
85 |
|