Skip to content

Commit 7b0fbfb

Browse files
authored
Merge pull request #72 from sbslee/0.38.0-dev
0.38.0 dev
2 parents 4b84de8 + 18845cd commit 7b0fbfb

File tree

4 files changed

+11
-134
lines changed

4 files changed

+11
-134
lines changed

CHANGELOG.rst

+6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
Changelog
22
*********
33

4+
0.38.0 (2024-06-16)
5+
-------------------
6+
7+
* Update :meth:`pyvcf.has_chr_prefix` method to ignore the HLA contigs for GRCh38.
8+
* :issue:`71`: Deprecate :meth:`common.plot_cytobands` method.
9+
410
0.37.0 (2023-09-09)
511
-------------------
612

fuc/api/common.py

-133
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import pandas as pd
2323
import numpy as np
2424
import matplotlib.pyplot as plt
25-
from matplotlib.collections import BrokenBarHCollection
2625
import matplotlib.patches as mpatches
2726
import seaborn as sns
2827

@@ -841,138 +840,6 @@ def extract_sequence(fasta, region):
841840
sequence = ''
842841
return sequence
843842

844-
def plot_cytobands(cytoband, bed, ax=None, figsize=None):
845-
"""
846-
Create chromosome ideograms along with BED data.
847-
848-
The method's source code is derived from a Python script (ideograms.py)
849-
written by Ryan Dale. The original script can be found at:
850-
https://gist.github.com/daler/c98fc410282d7570efc3#file-ideograms-py
851-
852-
Parameters
853-
----------
854-
cytoband : str
855-
Text file containing cytoband ideogram information.
856-
bed : str
857-
BED file to be displayed.
858-
ax : matplotlib.axes.Axes, optional
859-
Pre-existing axes for the plot. Otherwise, crete a new one.
860-
figsize : tuple, optional
861-
Width, height in inches. Format: (float, float).
862-
863-
Examples
864-
--------
865-
866-
.. plot::
867-
:context: close-figs
868-
869-
>>> import matplotlib.pyplot as plt
870-
>>> from fuc import common
871-
>>> common.load_dataset('cytoband')
872-
>>> cytoband_file = '~/fuc-data/cytoband/cytoBandIdeo.txt.gz'
873-
>>> bed_file = '~/fuc-data/cytoband/ucsc_genes.bed.gz'
874-
>>> common.plot_cytobands(cytoband_file, bed_file, figsize=(10, 8))
875-
"""
876-
def chromosome_collections(df, y_positions, height, **kwargs):
877-
del_width = False
878-
if 'width' not in df.columns:
879-
del_width = True
880-
df['width'] = df['end'] - df['start']
881-
for chrom, group in df.groupby('chrom'):
882-
yrange = (y_positions[chrom], height)
883-
xranges = group[['start', 'width']].values
884-
yield BrokenBarHCollection(
885-
xranges, yrange, edgecolors=("black",), facecolors=group['colors'], **kwargs)
886-
if del_width:
887-
del df['width']
888-
889-
# Height of each ideogram
890-
chrom_height = 1
891-
892-
# Spacing between consecutive ideograms
893-
chrom_spacing = 1
894-
895-
# Height of the gene track. Should be smaller than `chrom_spacing` in order to
896-
# fit correctly
897-
gene_height = 0.4
898-
899-
# Padding between the top of a gene track and its corresponding ideogram
900-
gene_padding = 0.1
901-
902-
# Decide which chromosomes to use
903-
chromosome_list = [f'chr{i}' for i in list(range(1, 23)) + ['M', 'X', 'Y']]
904-
905-
# Keep track of the y positions for ideograms and genes for each chromosome,
906-
# and the center of each ideogram (which is where we'll put the ytick labels)
907-
ybase = 0
908-
chrom_ybase = {}
909-
gene_ybase = {}
910-
chrom_centers = {}
911-
912-
# Iterate in reverse so that items in the beginning of `chromosome_list` will
913-
# appear at the top of the plot
914-
for chrom in chromosome_list[::-1]:
915-
chrom_ybase[chrom] = ybase
916-
chrom_centers[chrom] = ybase + chrom_height / 2.
917-
gene_ybase[chrom] = ybase - gene_height - gene_padding
918-
ybase += chrom_height + chrom_spacing
919-
920-
# Read in ideogram.txt, downloaded from UCSC Table Browser
921-
ideo = pd.read_table(
922-
cytoband,
923-
names=['chrom', 'start', 'end', 'name', 'gieStain']
924-
)
925-
926-
# Filter out chromosomes not in our list
927-
ideo = ideo[ideo.chrom.apply(lambda x: x in chromosome_list)]
928-
929-
# Add a new column for width
930-
ideo['width'] = ideo.end - ideo.start
931-
932-
# Colors for different chromosome stains
933-
color_lookup = {
934-
'gneg': (1., 1., 1.),
935-
'gpos25': (.6, .6, .6),
936-
'gpos50': (.4, .4, .4),
937-
'gpos75': (.2, .2, .2),
938-
'gpos100': (0., 0., 0.),
939-
'acen': (.8, .4, .4),
940-
'gvar': (.8, .8, .8),
941-
'stalk': (.9, .9, .9),
942-
}
943-
944-
# Add a new column for colors
945-
ideo['colors'] = ideo['gieStain'].apply(lambda x: color_lookup[x])
946-
947-
# Same thing for genes
948-
genes = pd.read_table(
949-
bed,
950-
names=['chrom', 'start', 'end', 'name'],
951-
usecols=range(4))
952-
genes = genes[genes.chrom.apply(lambda x: x in chromosome_list)]
953-
genes['width'] = genes.end - genes.start
954-
genes['colors'] = '#2243a8'
955-
956-
if ax is None:
957-
fig, ax = plt.subplots(figsize=figsize)
958-
959-
# Now all we have to do is call our function for the ideogram data...
960-
for collection in chromosome_collections(ideo, chrom_ybase, chrom_height):
961-
ax.add_collection(collection)
962-
963-
# ...and the gene data
964-
for collection in chromosome_collections(
965-
genes, gene_ybase, gene_height, alpha=0.5, linewidths=0
966-
):
967-
ax.add_collection(collection)
968-
969-
# Axes tweaking
970-
ax.set_yticks([chrom_centers[i] for i in chromosome_list])
971-
ax.set_yticklabels(chromosome_list)
972-
ax.axis('tight')
973-
974-
return ax
975-
976843
def convert_file2list(fn):
977844
"""
978845
Convert a text file to a list of filenames.

fuc/api/pyvcf.py

+4
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,8 @@ def has_chr_prefix(file, size=1000):
763763
Return True if all of the sampled contigs from a VCF file have the
764764
(annoying) 'chr' string.
765765
766+
For GRCh38, the HLA contigs will be ignored.
767+
766768
Parameters
767769
----------
768770
file : str
@@ -779,6 +781,8 @@ def has_chr_prefix(file, size=1000):
779781
vcf = VariantFile(file)
780782
for record in vcf.fetch():
781783
n += 1
784+
if record.chrom.startswith('HLA'):
785+
continue
782786
if 'chr' not in record.chrom:
783787
return False
784788
if n > size:

fuc/version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '0.37.0'
1+
__version__ = '0.38.0'

0 commit comments

Comments
 (0)