Skip to content

Commit e0df9cf

Browse files
committed
Gene expression class
1 parent 8f3bc2b commit e0df9cf

File tree

1 file changed

+392
-0
lines changed

1 file changed

+392
-0
lines changed

Diff for: ApobecExp.Expression.cls

+392
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,392 @@
1+
///
2+
/// Class for the analysis of distribution of APOBEC mutations
3+
/// relative to the gene expression
4+
///
5+
Class ApobecExp.Expression [ Abstract ]
6+
{
7+
8+
/// Set the maximum end of genes among intersecting genes
9+
/// for using in interval tree structure
10+
ClassMethod SetGeneIntervals()
11+
{
12+
set IndexName = "IndexGene"
13+
set chr = $order(^ApobecExp.GeneI(IndexName,""))
14+
while (chr '= "")
15+
{
16+
set maxendpos = 0
17+
set startpos = $order(^ApobecExp.GeneI(IndexName,chr,""))
18+
while (startpos '= "")
19+
{
20+
set endpos = $order(^ApobecExp.GeneI(IndexName,chr,startpos,""))
21+
while (endpos '= "")
22+
{
23+
set ID = $order(^ApobecExp.GeneI(IndexName,chr,startpos,endpos,""))
24+
if (maxendpos >= startpos) {
25+
set ^ApobecExp.GeneI(IndexName,chr,startpos,endpos,ID) = maxendpos }
26+
else {
27+
set ^ApobecExp.GeneI(IndexName,chr,startpos,endpos,ID) = 0 }
28+
if (maxendpos < endpos) {
29+
set maxendpos = endpos }
30+
set endpos = $order(^ApobecExp.GeneI(IndexName,chr,startpos,endpos))
31+
}
32+
set startpos = $order(^ApobecExp.GeneI(IndexName,chr,startpos))
33+
}
34+
set chr = $order(^ApobecExp.GeneI(IndexName,chr))
35+
}
36+
}
37+
38+
/// Function returns list of genes covering
39+
/// particular position on particular chromosome
40+
ClassMethod GetGeneList(chr As %String, pos As %Integer) As %Library.List
41+
{
42+
set ret = ""
43+
set IndexName = "IndexGene"
44+
set chrI = " "_$zconvert(chr,"U")
45+
set startpos = pos
46+
if ($data(^ApobecExp.GeneI(IndexName,chrI,pos)) '= 0) {
47+
set startpos = pos }
48+
else {
49+
set startpos = $order(^ApobecExp.GeneI(IndexName,chrI,pos),-1) }
50+
while (startpos '= "")
51+
{
52+
set endpos = $order(^ApobecExp.GeneI(IndexName,chrI,startpos,""))
53+
set maxend = 0
54+
while (endpos '= "")
55+
{
56+
set ID = $order(^ApobecExp.GeneI(IndexName,chrI,startpos,endpos,""))
57+
if (pos <= endpos) {
58+
set $list(ret,*+1) = ID }
59+
set endinterval = ^ApobecExp.GeneI(IndexName,chrI,startpos,endpos,ID)
60+
if (endinterval > maxend) {
61+
set maxend = endinterval }
62+
set endpos = $order(^ApobecExp.GeneI(IndexName,chrI,startpos,endpos))
63+
}
64+
set endinterval = maxend
65+
if ((endinterval = 0) || (endinterval < pos)) quit
66+
set startpos = $order(^ApobecExp.GeneI(IndexName,chrI,startpos),-1)
67+
}
68+
return ret
69+
}
70+
71+
/// Function returns maximum expression value among genes
72+
/// covering particular position on particular chromosome
73+
/// return: -1 if position in gene but no expression data,
74+
/// -2 if position not in gene
75+
ClassMethod GetMaxExpression(cancer As %String, sample As %String, chr As %String, pos As %Integer)
76+
{
77+
set genelist = ..GetGeneList(chr,pos)
78+
79+
if (genelist '= "") {
80+
set n = $listlength(genelist) }
81+
else {
82+
return (-2) }
83+
set maxexp = -1
84+
for j=1:1:n
85+
{
86+
set GeneID = $listget(^ApobecExp.GeneD($listget(genelist,j)),2)
87+
if ($data(^exp(cancer,sample,GeneID)) '= 0)
88+
{
89+
if (maxexp < ^exp(cancer,sample,GeneID)) {
90+
set maxexp = ^exp(cancer,sample,GeneID) }
91+
}
92+
}
93+
94+
return (maxexp)
95+
}
96+
97+
/// Function returns expression bin number
98+
/// based on particular expression value
99+
ClassMethod GetExpressionBin(ExpressionValue As %Float) As %Integer
100+
{
101+
if (ExpressionValue < 0) {
102+
return ExpressionValue }
103+
set leftval = $order(^expBins(ExpressionValue),-1)
104+
set rightval = $order(^expBins(leftval,""))
105+
if ((ExpressionValue > leftval) && (ExpressionValue <= rightval)) {
106+
return (^expBins(leftval,rightval)) }
107+
else {
108+
return "" }
109+
}
110+
111+
/// Function calculates number of APOBEC and other mutations
112+
/// relative to expression bins
113+
ClassMethod GetExpMutation()
114+
{
115+
kill ^expResultsAPOBEC
116+
kill ^expResultsOther
117+
for i = 1:1:^cancerType
118+
{
119+
set cnt = 0
120+
set cancer = ^cancerType(i)
121+
set sample = $order(^mutation(cancer,""))
122+
while (sample '= "")
123+
{
124+
for j = -2:1:^expBins
125+
{
126+
set ^expResultsAPOBEC(cancer,sample,j) = 0
127+
set ^expResultsOther(cancer,sample,j) = 0
128+
}
129+
set chr = $order(^mutation(cancer,sample,""))
130+
while (chr '= "")
131+
{
132+
set pos = $order(^mutation(cancer,sample,chr,""))
133+
while (pos '= "")
134+
{
135+
set isAPOBEC = $listget(^mutation(cancer,sample,chr,pos),3)
136+
137+
set maxexp = ..GetMaxExpression(cancer,sample,chr,pos)
138+
139+
set expbin = ..GetExpressionBin(maxexp)
140+
141+
if (isAPOBEC = 1) {
142+
set ^expResultsAPOBEC(cancer,sample,expbin) = ^expResultsAPOBEC(cancer,sample,expbin) + 1 }
143+
else {
144+
set ^expResultsOther(cancer,sample,expbin) = ^expResultsOther(cancer,sample,expbin) + 1 }
145+
146+
set cnt = cnt + 1
147+
148+
set pos = $order(^mutation(cancer,sample,chr,pos))
149+
}
150+
set chr = $order(^mutation(cancer,sample,chr))
151+
}
152+
set sample = $order(^mutation(cancer,sample))
153+
}
154+
write cancer,"=",cnt,!
155+
}
156+
}
157+
158+
/// Function calculates and saves number of TCW motifs
159+
/// (a potential APOBEC target) in genes
160+
ClassMethod SetTCWinGenes()
161+
{
162+
kill ^TCWinGenes
163+
set ^TCWinGenes = 0
164+
set IndexName = "IndexGene"
165+
set chr = $order(^ApobecExp.GeneI(IndexName,""))
166+
while (chr '= "")
167+
{
168+
#;write chr,!
169+
set startpos = $order(^ApobecExp.GeneI(IndexName,chr,""))
170+
while (startpos '= "")
171+
{
172+
#;write startpos,!
173+
set endpos = $order(^ApobecExp.GeneI(IndexName,chr,startpos,""))
174+
while (endpos '= "")
175+
{
176+
write chr," ",startpos," ",endpos,!
177+
set chrlow = $extract(chr,2,*)
178+
set chrlow = $zconvert(chrlow,"L")
179+
set chrlow = ^chrname(chrlow)
180+
181+
if ($data(^TCW(chrlow,startpos)) '= 0) {
182+
set pos = startpos }
183+
else {
184+
set pos = $order(^TCW(chrlow,startpos)) }
185+
while ((pos '= "") && (pos <= endpos))
186+
{
187+
set ^TCWinGenes(chrlow,pos) = ""
188+
set ^TCWinGenes = ^TCWinGenes + 1
189+
set pos = $order(^TCW(chrlow,pos))
190+
}
191+
192+
set endpos = $order(^ApobecExp.GeneI(IndexName,chr,startpos,endpos))
193+
}
194+
195+
set startpos = $order(^ApobecExp.GeneI(IndexName,chr,startpos))
196+
}
197+
set chr = $order(^ApobecExp.GeneI(IndexName,chr))
198+
}
199+
}
200+
201+
/// Function calculates number of TCW motifs
202+
/// in expression bins
203+
ClassMethod GetExpTCW()
204+
{
205+
//kill ^expTCW
206+
for i = 1:1:^cancerType
207+
{
208+
set cancer = ^cancerType(i)
209+
set sample = $order(^mutation(cancer,""))
210+
while (sample '= "")
211+
{
212+
if ($data(^expTCW(cancer,sample,0)) '= 0)
213+
{
214+
write cancer," ",sample," Done",!
215+
set sample = $order(^mutation(cancer,sample))
216+
continue
217+
}
218+
for j = -2:1:7 {
219+
set ^expTCW(cancer,sample,j) = 0 }
220+
set chr = $order(^TCWinGenes(""))
221+
while (chr '= "")
222+
{
223+
set cnt = 0
224+
set pos = $order(^TCWinGenes(chr,""))
225+
while (pos '= "")
226+
{
227+
set maxexp = ..GetMaxExpression(cancer,sample,chr,pos)
228+
set expbin = ..GetExpressionBin(maxexp)
229+
set ^expTCW(cancer,sample,expbin) = ^expTCW(cancer,sample,expbin) + 1
230+
set cnt = cnt + 1
231+
232+
set pos = $order(^TCWinGenes(chr,pos))
233+
}
234+
write chr," ",cnt,!
235+
set chr = $order(^TCWinGenes(chr))
236+
}
237+
238+
write cancer," ",sample,!
239+
set sample = $order(^mutation(cancer,sample))
240+
}
241+
}
242+
}
243+
244+
/// Function calculates number of bases
245+
/// in expression bins
246+
ClassMethod GetExpAll()
247+
{
248+
//kill ^expAll
249+
250+
for i = 1:1:^cancerType
251+
{
252+
set cancer = ^cancerType(i)
253+
set sample = $order(^mutation(cancer,""))
254+
while (sample '= "")
255+
{
256+
if ($data(^expAll(cancer,sample,0)) '= 0)
257+
{
258+
write cancer," ",sample," Done",!
259+
set sample = $order(^mutation(cancer,sample))
260+
continue
261+
}
262+
for j = -2:1:7 {
263+
set ^expAll(cancer,sample,j) = 0 }
264+
write cancer," ",sample,!
265+
266+
write "Interval part",!
267+
set chr = $order(^geneNonoverlap(""))
268+
while (chr '= "")
269+
{
270+
set startpos = $order(^geneNonoverlap(chr,""))
271+
while (startpos '= "")
272+
{
273+
set endpos = $order(^geneNonoverlap(chr,startpos,""))
274+
while (endpos '= "")
275+
{
276+
set pos = startpos
277+
set maxexp = ..GetMaxExpression(cancer,sample,chr,pos)
278+
set expbin = ..GetExpressionBin(maxexp)
279+
set ^expAll(cancer,sample,expbin) = ^expAll(cancer,sample,expbin) + (endpos - startpos + 1)
280+
281+
set endpos = $order(^geneNonoverlap(chr,startpos,endpos))
282+
}
283+
set startpos = $order(^geneNonoverlap(chr,startpos))
284+
}
285+
set chr = $order(^geneNonoverlap(chr))
286+
}
287+
288+
write "Overlapped part",!
289+
set chr = $order(^geneOverlap(""))
290+
while (chr '= "")
291+
{
292+
set pos = $order(^geneOverlap(chr,""))
293+
while (pos '= "")
294+
{
295+
set maxexp = ..GetMaxExpression(cancer,sample,chr,pos)
296+
set expbin = ..GetExpressionBin(maxexp)
297+
set ^expAll(cancer,sample,expbin) = ^expAll(cancer,sample,expbin) + 1
298+
set pos = $order(^geneOverlap(chr,pos))
299+
}
300+
set chr = $order(^geneOverlap(chr))
301+
}
302+
303+
write cancer," ",sample,!
304+
set sample = $order(^mutation(cancer,sample))
305+
}
306+
}
307+
}
308+
309+
/// Function divide DNA positions covering by genes
310+
/// into intervals covered by only single gene
311+
/// and intervals covered by multiple genes.
312+
/// Results are saved in globals.
313+
ClassMethod DivideGenesIntoIntervals()
314+
{
315+
kill ^geneNonoverlap
316+
set ^geneNonoverlap = 0
317+
kill ^geneOverlap
318+
set ^geneOverlap = 0
319+
set IndexName = "IndexGene"
320+
set chr = $order(^ApobecExp.GeneI(IndexName,""))
321+
while (chr '= "")
322+
{
323+
set chrlow = $zconvert($extract(chr,2,*),"L")
324+
set chrlow = ^chrname(chrlow)
325+
set startpos = $order(^ApobecExp.GeneI(IndexName,chr,""))
326+
while (startpos '= "")
327+
{
328+
set endpos = $order(^ApobecExp.GeneI(IndexName,chr,startpos,""))
329+
while (endpos '= "")
330+
{
331+
set prevn = 0
332+
for i = startpos:1:endpos
333+
{
334+
set genelist = ..GetGeneList(chrlow,i)
335+
336+
if (genelist '= "") {
337+
set n = $listlength(genelist) }
338+
else {
339+
set n = 0 }
340+
341+
if (n = 0)
342+
{
343+
write "error: no gene, chr=",chrlow,", pos=",i,!
344+
return
345+
}
346+
elseif (n > 1)
347+
{
348+
set ^geneOverlap(chrlow,i) = ""
349+
set ^geneOverlap = ^geneOverlap + 1
350+
if (prevn = 1)
351+
{
352+
set ^geneNonoverlap(chrlow,istart,iend) = ""
353+
set ^geneNonoverlap = ^geneNonoverlap + 1
354+
}
355+
set prevn = 2
356+
}
357+
elseif(n = 1)
358+
{
359+
if (prevn = 1)
360+
{
361+
set iend = iend + 1
362+
if (iend '= i)
363+
{
364+
write "error: iend '= i, chr=",chr,", startpos=",startpos,", endpos=",endpos,!
365+
return 0
366+
}
367+
if (i = endpos)
368+
{
369+
set ^geneNonoverlap(chrlow,istart,iend) = ""
370+
set ^geneNonoverlap = ^geneNonoverlap + 1
371+
set prevn = 0
372+
}
373+
}
374+
else
375+
{
376+
set istart = i
377+
set iend = i
378+
}
379+
set prevn = 1
380+
}
381+
}
382+
383+
set endpos = $order(^ApobecExp.GeneI(IndexName,chr,startpos,endpos))
384+
}
385+
set startpos = $order(^ApobecExp.GeneI(IndexName,chr,startpos))
386+
}
387+
write chr,!
388+
set chr = $order(^ApobecExp.GeneI(IndexName,chr))
389+
}
390+
}
391+
392+
}

0 commit comments

Comments
 (0)