---
title: "Handling single-cell RNA-seq data in GEOquery"
author: "Sean Davis"
date: "2025-08-29"
last-modified: {{< meta last-modified >}}
format:
  html:
    toc: true
execute:
  eval: false
vignette: >
  %\VignetteIndexEntry{Handling single-cell RNA-seq data in GEOquery}
  %\VignetteEngine{quarto::html}
  %\VignetteEncoding{UTF-8}
---

```{r message=FALSE, warning=FALSE, eval=TRUE}
library(GEOquery)
library(SingleCellExperiment)
library(DropletUtils)
```

## Single cell searching

```{r}
res = searchGEO("single cell mtx matrix")
head(res)
```


## Mtx files

```{r}
poss= getGEOSuppFiles('GSE248214', fetch_files = FALSE)
poss
```


## Multiple h5ad files

```{r}
library(GEOquery)
s = getGEOSuppFiles('GSE161228', fetch_files = FALSE)
s
```

## Single h5ad file


## SAMPLES

### mtx file

```{r}
s = getGEOSuppFiles('GSM7908437')
s
```
```{r}
sample_prefix = sub('barcodes.tsv.gz','', grep('barcodes.tsv.gz', s$filepath, value = TRUE))
gsm_sce <- DropletUtils::read10xCounts(samples = sample_prefix, type = 'prefix')
gsm_sce
head(colData(gsm_sce))
```

### Tar of mtx files from Series record

```{r}
s = getGEOSuppFiles('GSE248214')
s
```
From here, untar and then wrap to get prefixes

```{r}
tar_filename = rownames(s)[1]
exdir = tempdir()
untar(tar_filename, exdir=exdir)
sample_prefixes = sub('matrix.mtx.gz','',dir(exdir,pattern='*matrix.mtx.gz',full.names = TRUE))
gse_sce = DropletUtils::read10xCounts(samples = sample_prefixes, type='prefix')
gse_sce
```

### Mix of types in a single GSE

```{r}
getGEOSeriesFileListing('GSE288770')
```

### 10x h5 file

```{r}
s = getGEOSuppFiles('GSM8775062')
sce = DropletUtils::read10xCounts(s$filepath)
sce
```

### 10x matrix mtx files

```{r}
s = getGEOSuppFiles('GSM8775066')
sce2 = DropletUtils::read10xCounts(samples = '/Users/davsean/Documents/git/GEOquery/vignettes/GSM8775066/GSM8775066_vivo_day7_1_', type='prefix')
sce2
```
