|
22 | 22 | import pandas as pd
|
23 | 23 | import numpy as np
|
24 | 24 | import matplotlib.pyplot as plt
|
25 |
| -from matplotlib.collections import BrokenBarHCollection |
26 | 25 | import matplotlib.patches as mpatches
|
27 | 26 | import seaborn as sns
|
28 | 27 |
|
@@ -841,138 +840,6 @@ def extract_sequence(fasta, region):
|
841 | 840 | sequence = ''
|
842 | 841 | return sequence
|
843 | 842 |
|
844 |
| -def plot_cytobands(cytoband, bed, ax=None, figsize=None): |
845 |
| - """ |
846 |
| - Create chromosome ideograms along with BED data. |
847 |
| -
|
848 |
| - The method's source code is derived from a Python script (ideograms.py) |
849 |
| - written by Ryan Dale. The original script can be found at: |
850 |
| - https://gist.github.com/daler/c98fc410282d7570efc3#file-ideograms-py |
851 |
| -
|
852 |
| - Parameters |
853 |
| - ---------- |
854 |
| - cytoband : str |
855 |
| - Text file containing cytoband ideogram information. |
856 |
| - bed : str |
857 |
| - BED file to be displayed. |
858 |
| - ax : matplotlib.axes.Axes, optional |
859 |
| - Pre-existing axes for the plot. Otherwise, crete a new one. |
860 |
| - figsize : tuple, optional |
861 |
| - Width, height in inches. Format: (float, float). |
862 |
| -
|
863 |
| - Examples |
864 |
| - -------- |
865 |
| -
|
866 |
| - .. plot:: |
867 |
| - :context: close-figs |
868 |
| -
|
869 |
| - >>> import matplotlib.pyplot as plt |
870 |
| - >>> from fuc import common |
871 |
| - >>> common.load_dataset('cytoband') |
872 |
| - >>> cytoband_file = '~/fuc-data/cytoband/cytoBandIdeo.txt.gz' |
873 |
| - >>> bed_file = '~/fuc-data/cytoband/ucsc_genes.bed.gz' |
874 |
| - >>> common.plot_cytobands(cytoband_file, bed_file, figsize=(10, 8)) |
875 |
| - """ |
876 |
| - def chromosome_collections(df, y_positions, height, **kwargs): |
877 |
| - del_width = False |
878 |
| - if 'width' not in df.columns: |
879 |
| - del_width = True |
880 |
| - df['width'] = df['end'] - df['start'] |
881 |
| - for chrom, group in df.groupby('chrom'): |
882 |
| - yrange = (y_positions[chrom], height) |
883 |
| - xranges = group[['start', 'width']].values |
884 |
| - yield BrokenBarHCollection( |
885 |
| - xranges, yrange, edgecolors=("black",), facecolors=group['colors'], **kwargs) |
886 |
| - if del_width: |
887 |
| - del df['width'] |
888 |
| - |
889 |
| - # Height of each ideogram |
890 |
| - chrom_height = 1 |
891 |
| - |
892 |
| - # Spacing between consecutive ideograms |
893 |
| - chrom_spacing = 1 |
894 |
| - |
895 |
| - # Height of the gene track. Should be smaller than `chrom_spacing` in order to |
896 |
| - # fit correctly |
897 |
| - gene_height = 0.4 |
898 |
| - |
899 |
| - # Padding between the top of a gene track and its corresponding ideogram |
900 |
| - gene_padding = 0.1 |
901 |
| - |
902 |
| - # Decide which chromosomes to use |
903 |
| - chromosome_list = [f'chr{i}' for i in list(range(1, 23)) + ['M', 'X', 'Y']] |
904 |
| - |
905 |
| - # Keep track of the y positions for ideograms and genes for each chromosome, |
906 |
| - # and the center of each ideogram (which is where we'll put the ytick labels) |
907 |
| - ybase = 0 |
908 |
| - chrom_ybase = {} |
909 |
| - gene_ybase = {} |
910 |
| - chrom_centers = {} |
911 |
| - |
912 |
| - # Iterate in reverse so that items in the beginning of `chromosome_list` will |
913 |
| - # appear at the top of the plot |
914 |
| - for chrom in chromosome_list[::-1]: |
915 |
| - chrom_ybase[chrom] = ybase |
916 |
| - chrom_centers[chrom] = ybase + chrom_height / 2. |
917 |
| - gene_ybase[chrom] = ybase - gene_height - gene_padding |
918 |
| - ybase += chrom_height + chrom_spacing |
919 |
| - |
920 |
| - # Read in ideogram.txt, downloaded from UCSC Table Browser |
921 |
| - ideo = pd.read_table( |
922 |
| - cytoband, |
923 |
| - names=['chrom', 'start', 'end', 'name', 'gieStain'] |
924 |
| - ) |
925 |
| - |
926 |
| - # Filter out chromosomes not in our list |
927 |
| - ideo = ideo[ideo.chrom.apply(lambda x: x in chromosome_list)] |
928 |
| - |
929 |
| - # Add a new column for width |
930 |
| - ideo['width'] = ideo.end - ideo.start |
931 |
| - |
932 |
| - # Colors for different chromosome stains |
933 |
| - color_lookup = { |
934 |
| - 'gneg': (1., 1., 1.), |
935 |
| - 'gpos25': (.6, .6, .6), |
936 |
| - 'gpos50': (.4, .4, .4), |
937 |
| - 'gpos75': (.2, .2, .2), |
938 |
| - 'gpos100': (0., 0., 0.), |
939 |
| - 'acen': (.8, .4, .4), |
940 |
| - 'gvar': (.8, .8, .8), |
941 |
| - 'stalk': (.9, .9, .9), |
942 |
| - } |
943 |
| - |
944 |
| - # Add a new column for colors |
945 |
| - ideo['colors'] = ideo['gieStain'].apply(lambda x: color_lookup[x]) |
946 |
| - |
947 |
| - # Same thing for genes |
948 |
| - genes = pd.read_table( |
949 |
| - bed, |
950 |
| - names=['chrom', 'start', 'end', 'name'], |
951 |
| - usecols=range(4)) |
952 |
| - genes = genes[genes.chrom.apply(lambda x: x in chromosome_list)] |
953 |
| - genes['width'] = genes.end - genes.start |
954 |
| - genes['colors'] = '#2243a8' |
955 |
| - |
956 |
| - if ax is None: |
957 |
| - fig, ax = plt.subplots(figsize=figsize) |
958 |
| - |
959 |
| - # Now all we have to do is call our function for the ideogram data... |
960 |
| - for collection in chromosome_collections(ideo, chrom_ybase, chrom_height): |
961 |
| - ax.add_collection(collection) |
962 |
| - |
963 |
| - # ...and the gene data |
964 |
| - for collection in chromosome_collections( |
965 |
| - genes, gene_ybase, gene_height, alpha=0.5, linewidths=0 |
966 |
| - ): |
967 |
| - ax.add_collection(collection) |
968 |
| - |
969 |
| - # Axes tweaking |
970 |
| - ax.set_yticks([chrom_centers[i] for i in chromosome_list]) |
971 |
| - ax.set_yticklabels(chromosome_list) |
972 |
| - ax.axis('tight') |
973 |
| - |
974 |
| - return ax |
975 |
| - |
976 | 843 | def convert_file2list(fn):
|
977 | 844 | """
|
978 | 845 | Convert a text file to a list of filenames.
|
|
0 commit comments