Skip to content

Commit 6675042

Browse files
committed
OK
Metadata can be attached at both ExpressionExperiment and BioAssay levels.
1 parent b2f9237 commit 6675042

File tree

17 files changed

+279
-14
lines changed

17 files changed

+279
-14
lines changed

gemma-cli/src/main/java/ubic/gemma/core/apps/RNASeqDataAddCli.java

+9
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,10 @@
2929
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
3030
import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService;
3131

32+
import java.io.File;
3233
import java.io.IOException;
3334
import java.util.Collection;
35+
import java.util.Collections;
3436

3537
/**
3638
* Designed to add count and/or RPKM data to a data set that has only meta-data.
@@ -50,6 +52,7 @@ public class RNASeqDataAddCli extends ExpressionExperimentManipulatingCLI {
5052
private Integer readLength = null;
5153
private String rpkmFile = null;
5254
private boolean justbackfillLog2cpm = false;
55+
private File[] additionalMetadata;
5356

5457
@Override
5558
public CommandGroup getCommandGroup() {
@@ -70,6 +73,10 @@ protected void buildOptions( Options options ) {
7073

7174
options.addOption( "log2cpm", "Just compute log2cpm from the existing stored count data (backfill); batchmode OK, no other options needed" );
7275

76+
options.addOption( Option.builder( "am" )
77+
.longOpt( "additional-metadata" )
78+
.type( File.class )
79+
.build() );
7380
}
7481

7582
@Override
@@ -192,6 +199,8 @@ protected void doWork() throws Exception {
192199
serv.addCountData( ee, targetArrayDesign, countMatrix, rpkmMatrix, readLength, isPairedReads,
193200
allowMissingSamples );
194201

202+
serv.addAdditionalMetadata( ee, additionalMetadata, Collections.emptyMap() );
203+
195204
} catch ( IOException e ) {
196205
throw new Exception( "Failed while processing " + ee, e );
197206
}

gemma-core/src/main/java/ubic/gemma/core/loader/expression/DataUpdater.java

+5
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@
44
import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix;
55
import ubic.gemma.model.common.quantitationtype.QuantitationType;
66
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
7+
import ubic.gemma.model.expression.bioAssay.BioAssay;
78
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
89

10+
import java.io.File;
911
import java.io.IOException;
12+
import java.util.Map;
1013

1114
public interface DataUpdater {
1215
void addAffyDataFromAPTOutput( ExpressionExperiment ee, String pathToAptOutputFile ) throws IOException;
@@ -26,4 +29,6 @@ void replaceData( ExpressionExperiment ee, ArrayDesign targetPlatform, Quantitat
2629

2730
ExpressionExperiment replaceData( ExpressionExperiment ee, ArrayDesign targetPlatform,
2831
ExpressionDataDoubleMatrix data );
32+
33+
void addAdditionalMetadata( ExpressionExperiment ee, File[] additionalMetadata, Map<BioAssay, File[]> additionalMetadataPerBioAssay );
2934
}

gemma-core/src/main/java/ubic/gemma/core/loader/expression/DataUpdaterImpl.java

+24-8
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.apache.commons.logging.Log;
2222
import org.apache.commons.logging.LogFactory;
2323
import org.springframework.beans.factory.annotation.Autowired;
24+
import org.springframework.http.MediaType;
2425
import org.springframework.stereotype.Service;
2526
import org.springframework.transaction.annotation.Propagation;
2627
import org.springframework.transaction.annotation.Transactional;
@@ -49,6 +50,7 @@
4950
import ubic.gemma.model.expression.biomaterial.BioMaterial;
5051
import ubic.gemma.model.expression.designElement.CompositeSequence;
5152
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
53+
import ubic.gemma.model.expression.experiment.MetadataType;
5254
import ubic.gemma.persistence.service.analysis.expression.pca.PrincipalComponentAnalysisService;
5355
import ubic.gemma.persistence.service.analysis.expression.sampleCoexpression.SampleCoexpressionAnalysisService;
5456
import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService;
@@ -61,6 +63,7 @@
6163
import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService;
6264
import ubic.gemma.persistence.util.EntityUtils;
6365

66+
import java.io.File;
6467
import java.io.IOException;
6568
import java.util.*;
6669

@@ -189,9 +192,10 @@ public void addAffyDataFromAPTOutput( ExpressionExperiment ee, String pathToAptO
189192
* switched to use it.
190193
* @param countMatrix Representing 'raw' counts (added after rpkm, if provided).
191194
* @param rpkmMatrix Representing per-gene normalized data, optional (RPKM or FPKM)
192-
* @param allowMissingSamples if true, samples that are missing data will be deleted from the experiment.
193-
* @param isPairedReads is paired reads
194195
* @param readLength read length
196+
* @param isPairedReads is paired reads
197+
* @param allowMissingSamples if true, samples that are missing data will be deleted from the experiment.
198+
* @param additionalMetadata
195199
*/
196200
@Override
197201
@Transactional(propagation = Propagation.NEVER)
@@ -280,7 +284,6 @@ public void addCountData( ExpressionExperiment ee, ArrayDesign targetArrayDesign
280284

281285
this.addData( ee, targetArrayDesign, rpkmEEMatrix );
282286
}
283-
284287
}
285288

286289
/**
@@ -545,11 +548,11 @@ public void reprocessAffyDataFromCel( ExpressionExperiment ee ) {
545548
* selected experiment. Will do postprocessing if the data quantitationType is 'preferred', but if there is already
546549
* a preferred quantitation type, an error will be thrown.
547550
*
548-
* @param ee ee
549-
* @param targetPlatform optional; if null, uses the platform already used (if there is just one; you can't use
550-
* this
551-
* for a multi-platform dataset)
552-
* @param data to slot in
551+
* @param ee ee
552+
* @param targetPlatform optional; if null, uses the platform already used (if there is just one; you can't use
553+
* this
554+
* for a multi-platform dataset)
555+
* @param data to slot in
553556
* @return ee
554557
*/
555558
@Override
@@ -673,6 +676,19 @@ public ExpressionExperiment replaceData( ExpressionExperiment ee, ArrayDesign ta
673676
return ee;
674677
}
675678

679+
@Override
680+
@Transactional(propagation = Propagation.NEVER)
681+
public void addAdditionalMetadata( ExpressionExperiment ee, File[] additionalMetadata, Map<BioAssay, File[]> additionalMetadataPerBioAssay ) {
682+
for ( File am : additionalMetadata ) {
683+
experimentService.addAdditionalMetadata( ee, MetadataType.PREPROCESSING, am, MediaType.TEXT_PLAIN_VALUE );
684+
}
685+
for ( Map.Entry<BioAssay, File[]> e : additionalMetadataPerBioAssay.entrySet() ) {
686+
for ( File am : e.getValue() ) {
687+
experimentService.addAdditionalMetadata( ee, e.getKey(), MetadataType.PREPROCESSING, am, MediaType.TEXT_PLAIN_VALUE );
688+
}
689+
}
690+
}
691+
676692
/**
677693
* RNA-seq
678694
*
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package ubic.gemma.model.expression;
2+
3+
import lombok.Data;
4+
import lombok.EqualsAndHashCode;
5+
import ubic.gemma.model.common.Describable;
6+
import ubic.gemma.model.expression.bioAssay.BioAssay;
7+
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
8+
import ubic.gemma.model.expression.experiment.MetadataType;
9+
10+
import java.sql.Blob;
11+
12+
/**
13+
* Metadata associated to an {@link ExpressionExperiment} or {@link BioAssay}.
14+
* @author poirigui
15+
*/
16+
@Data
17+
@EqualsAndHashCode(of = { "id" })
18+
public class AdditionalMetadata implements Describable {
19+
20+
private Long id;
21+
private String name;
22+
private String description;
23+
private MetadataType type;
24+
private Blob contents;
25+
private String mediaType;
26+
}

gemma-core/src/main/java/ubic/gemma/model/expression/bioAssay/BioAssay.java

+13
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,13 @@
2323
import ubic.gemma.model.common.description.DatabaseEntry;
2424
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
2525
import ubic.gemma.model.expression.biomaterial.BioMaterial;
26+
import ubic.gemma.model.expression.AdditionalMetadata;
2627

2728
import javax.persistence.Transient;
2829
import java.io.Serializable;
2930
import java.util.Date;
31+
import java.util.HashSet;
32+
import java.util.Set;
3033

3134
/**
3235
* Represents the bringing together of a biomaterial with an assay of some sort (typically an expression assay). We
@@ -60,6 +63,8 @@ public class BioAssay extends AbstractDescribable implements gemma.gsec.model.Se
6063
*/
6164
private String fastqHeaders;
6265

66+
private Set<AdditionalMetadata> additionalMetadata = new HashSet<>();
67+
6368
@Override
6469
public int hashCode() {
6570
int hashCode;
@@ -215,6 +220,14 @@ public void setFastqHeaders( String fastqHeaders ) {
215220
this.fastqHeaders = fastqHeaders;
216221
}
217222

223+
public Set<AdditionalMetadata> getAdditionalMetadata() {
224+
return additionalMetadata;
225+
}
226+
227+
public void setAdditionalMetadata( Set<AdditionalMetadata> additionalMetadata ) {
228+
this.additionalMetadata = additionalMetadata;
229+
}
230+
218231
public static final class Factory {
219232

220233
public static BioAssay newInstance() {

gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperiment.java

+14-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
*/
1515
package ubic.gemma.model.expression.experiment;
1616

17-
import java.util.Collection;
1817
import java.util.HashSet;
1918
import java.util.Set;
2019

@@ -26,6 +25,7 @@
2625
import ubic.gemma.model.common.auditAndSecurity.curation.CurationDetails;
2726
import ubic.gemma.model.common.description.Characteristic;
2827
import ubic.gemma.model.common.quantitationtype.QuantitationType;
28+
import ubic.gemma.model.expression.AdditionalMetadata;
2929
import ubic.gemma.model.expression.bioAssay.BioAssay;
3030
import ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation;
3131
import ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector;
@@ -81,6 +81,11 @@ public void setNumberOfSamples( Integer numberofSamples ) {
8181

8282
private Set<Characteristic> allCharacteristics;
8383

84+
/**
85+
* A collection of additional metadata blobs.
86+
*/
87+
private Set<AdditionalMetadata> additionalMetadata = new HashSet<>();
88+
8489
@Override
8590
public ExpressionExperimentValueObject createValueObject() {
8691
return new ExpressionExperimentValueObject( this );
@@ -280,6 +285,14 @@ public void setTaxon( Taxon taxon ) {
280285
this.taxon = taxon;
281286
}
282287

288+
public Set<AdditionalMetadata> getAdditionalMetadata() {
289+
return additionalMetadata;
290+
}
291+
292+
public void setAdditionalMetadata( Set<AdditionalMetadata> additionalMetadata ) {
293+
this.additionalMetadata = additionalMetadata;
294+
}
295+
283296
@Override
284297
public String toString() {
285298
return super.toString() + ( shortName != null ? " Short Name=" + shortName : "" );
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package ubic.gemma.model.expression.experiment;
2+
3+
import ubic.gemma.model.expression.bioAssay.BioAssay;
4+
5+
public enum MetadataType {
6+
/**
7+
* A sequencing QC report.
8+
* <p>
9+
* Example: a FastQC report attached to a specific {@link BioAssay}.
10+
*/
11+
SEQUENCING_QC_REPORT,
12+
/**
13+
* A sequencing alignment report.
14+
* <p>
15+
* Example: STAR's Log.final.out file on a {@link BioAssay}
16+
*/
17+
SEQUENCING_ALIGNMENT_REPORT,
18+
/**
19+
* An overall sequencing report.
20+
* <p>
21+
* Example: a MultiQC report on a {@link ExpressionExperiment}
22+
*/
23+
SEQUENCING_OVERALL_REPORT,
24+
}

gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java

+16
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import ubic.gemma.model.common.description.Characteristic;
88
import ubic.gemma.model.common.description.DatabaseEntry;
99
import ubic.gemma.model.common.quantitationtype.QuantitationType;
10+
import ubic.gemma.model.expression.AdditionalMetadata;
1011
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
1112
import ubic.gemma.model.expression.bioAssay.BioAssay;
1213
import ubic.gemma.model.expression.bioAssayData.BioAssayDimension;
@@ -18,11 +19,13 @@
1819
import ubic.gemma.persistence.service.BrowsingDao;
1920
import ubic.gemma.persistence.service.FilteringVoEnabledDao;
2021
import ubic.gemma.persistence.service.common.auditAndSecurity.curation.CuratableDao;
22+
import ubic.gemma.persistence.service.expression.bioAssay.BioAssayDao;
2123
import ubic.gemma.persistence.util.Filters;
2224
import ubic.gemma.persistence.util.Slice;
2325
import ubic.gemma.persistence.util.Sort;
2426

2527
import javax.annotation.Nullable;
28+
import java.io.InputStream;
2629
import java.util.Collection;
2730
import java.util.Date;
2831
import java.util.List;
@@ -232,4 +235,17 @@ Map<ExpressionExperiment, Collection<AuditEvent>> getSampleRemovalEvents(
232235
long countTroubledPlatforms( ExpressionExperiment ee );
233236

234237
MeanVarianceRelation updateMeanVarianceRelation( ExpressionExperiment ee, MeanVarianceRelation mvr );
238+
239+
/**
240+
* Add metadata on a given dataset.
241+
*/
242+
AdditionalMetadata addAdditionalMetadata( ExpressionExperiment ee, MetadataType type, InputStream additionalMetadata, long length, String mediaType );
243+
244+
/**
245+
* Add metadata on a specific bioassay.
246+
* <p>
247+
* FIXME: this should probably be relocated in {@link BioAssayDao}.
248+
* @throws IllegalArgumentException if the bioassay does not belong to the expression experiment
249+
*/
250+
AdditionalMetadata addAdditionalMetadata( ExpressionExperiment ee, BioAssay sample, MetadataType metadataType, InputStream stream, long length, String mediaType ) throws IllegalArgumentException;
235251
}

gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java

+30-2
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import ubic.gemma.model.common.description.Characteristic;
4242
import ubic.gemma.model.common.description.DatabaseEntry;
4343
import ubic.gemma.model.common.quantitationtype.QuantitationType;
44+
import ubic.gemma.model.expression.AdditionalMetadata;
4445
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
4546
import ubic.gemma.model.expression.arrayDesign.ArrayDesignValueObject;
4647
import ubic.gemma.model.expression.bioAssay.BioAssay;
@@ -60,8 +61,8 @@
6061
import ubic.gemma.persistence.util.*;
6162

6263
import javax.annotation.Nullable;
64+
import java.io.InputStream;
6365
import java.util.*;
64-
import java.util.regex.Pattern;
6566
import java.util.stream.Collectors;
6667

6768
import static java.util.stream.Collectors.groupingBy;
@@ -430,7 +431,7 @@ public Collection<ExpressionExperiment> findByTaxon( Taxon taxon ) {
430431
//language=HQL
431432
// final String queryString =
432433
// "select distinct ee from ExpressionExperiment as ee " + "inner join ee.bioAssays as ba "
433-
// + "inner join ba.sampleUsed as sample where sample.sourceTaxon = :taxon ";
434+
// + "inner join ba.sampleUsed as bioAssay where bioAssay.sourceTaxon = :taxon ";
434435
final String queryString = "select ee from ExpressionExperiment as ee where ee.taxon = (:taxon)";
435436

436437
//noinspection unchecked
@@ -701,6 +702,32 @@ public MeanVarianceRelation updateMeanVarianceRelation( ExpressionExperiment ee,
701702
return mvr;
702703
}
703704

705+
@Override
706+
public AdditionalMetadata addAdditionalMetadata( ExpressionExperiment ee, MetadataType type, InputStream stream, long length, String mediaType ) {
707+
AdditionalMetadata am = createAdditionalMetadata( type, stream, length );
708+
ee.getAdditionalMetadata().add( am );
709+
return am;
710+
}
711+
712+
@Override
713+
public AdditionalMetadata addAdditionalMetadata( ExpressionExperiment ee, BioAssay bioAssay, MetadataType type, InputStream stream, long length, String mediaType ) throws IllegalArgumentException {
714+
if ( ee.getBioAssays().contains( bioAssay ) ) {
715+
throw new IllegalArgumentException( String.format( "%s is not part of %s", bioAssay, ee ) );
716+
}
717+
AdditionalMetadata am = createAdditionalMetadata( type, stream, length );
718+
bioAssay.getAdditionalMetadata().add( am );
719+
return am;
720+
}
721+
722+
private AdditionalMetadata createAdditionalMetadata( MetadataType type, InputStream stream, long length ) {
723+
AdditionalMetadata meta = new AdditionalMetadata();
724+
meta.setType( type );
725+
meta.setContents( getSessionFactory().getCurrentSession().getLobHelper().createBlob( stream, length ) );
726+
meta.setMediaType( "text/plain" );
727+
getSessionFactory().getCurrentSession().persist( meta );
728+
return meta;
729+
}
730+
704731
@Override
705732
public Collection<ArrayDesign> getArrayDesignsUsed( BioAssaySet bas ) {
706733

@@ -1701,6 +1728,7 @@ protected void configureFilterableProperties( FilterablePropertiesConfigurer con
17011728
configurer.unregisterProperty( "source" );
17021729
configurer.unregisterProperty( "otherParts.size" );
17031730
configurer.unregisterProperty( "otherRelevantPublications.size" );
1731+
configurer.unregisterProperty( "additionalMetadata.size" );
17041732

17051733
configurer.unregisterProperties( p -> p.endsWith( "externalDatabases.size" ) );
17061734

0 commit comments

Comments
 (0)