Source code for bioden.exporter

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
#  Copyright 2010, 2011, 2015 GiMaRIS <info@gimaris.com>
#
#  This file is part of BioDen - A data normalizer and transponer for
#  files containing taxon biomass/density data for ecotopes.
#
#  BioDen is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  BioDen is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.

import os
import csv
from sqlite3 import dbapi2 as sqlite

import xlwt

[docs]class Generator: """Super class for Generator classes.""" def __init__(self, processor): self.processor = processor self._dbfile = processor._dbfile self._property = processor._property self._representative_groups = processor._representative_groups self._do_round = processor._do_round self._output_folder = processor._output_folder self._file_extension = ".txt" self.taxa = processor.taxa self.ecotopes = processor.ecotopes
[docs] def ecotope_data_grouped(self, ecotope, data_type='raw'): """Return an iterator object which generates the CSV data of grouped data for ecotope `ecotope`. """ if data_type == 'raw': target_table = 'sums_of' elif data_type == 'normalized': target_table = 'normalized_sums_of' else: raise ValueError("Value for 'data' can be either 'raw' or 'normalized', not '%s'." % data_type) # Connect to the database. connection = sqlite.connect(self._dbfile) cursor = connection.cursor() yield ['Property:', self._property] # Return the first row containing the ecotope name. yield ['Ecotope:', ecotope] # Get the number of groups for this ecotope. cursor.execute("SELECT group_id \ FROM %s \ WHERE compiled_ecotope = ?" % target_table, (ecotope,) ) group_ids = [] for id in cursor: if id[0] not in group_ids: group_ids.append(id[0]) group_ids.sort() # Return third row containing the group numbers. row = ['Sample group:'] row.extend(group_ids) yield row # Return fourth row containing the group surfaces. row = ['Group surface:'] for group_id in group_ids: cursor.execute("SELECT group_surface \ FROM %s \ WHERE compiled_ecotope = ? \ AND group_id = ?" % target_table, (ecotope,group_id) ) row.extend(cursor.fetchone()) yield row # Return an empty row. yield [None] # Return the data rows. for taxon in self.taxa: row = [taxon] cursor.execute("SELECT group_id, sum_of \ FROM %s \ WHERE compiled_ecotope = ? \ AND standardised_taxon = ?" % (target_table), (ecotope,taxon) ) sums_of = dict(cursor) for group_id in group_ids: if group_id in sums_of: sum_of = sums_of[group_id] if isinstance(self._do_round, int): sum_of = round(sum_of, self._do_round) row.append(sum_of) else: row.append(None) yield row # Close connection with the local database. cursor.close() connection.close()
[docs] def ecotope_data_raw(self, ecotope): """Return an iterator object which generates the CSV data of non-grouped data for ecotope `ecotope`. """ if self._property == 'biomass': select_field = 'sum_of_biomass' else: select_field = 'sum_of_density' connection = sqlite.connect(self._dbfile) cursor = connection.cursor() # Return the first row containing the property. yield ['Property:', self._property] # Return the second row containing the ecotope name. yield ['Ecotope:', ecotope] # Get the record IDs for this ecotope. cursor.execute("SELECT sample_code \ FROM data \ WHERE compiled_ecotope = ?", (ecotope,) ) sample_codes = [] for id in cursor: if id[0] not in sample_codes: sample_codes.append(id[0]) sample_codes.sort() # Return third row containing the sample codes. row = ['Sample code:'] row.extend(sample_codes) yield row # Return fourth row containing the sample surfaces. row = ['Sample surface:'] for sample_code in sample_codes: cursor.execute("SELECT sample_surface \ FROM samples \ WHERE sample_code = ?", (sample_code,) ) sample_surface = cursor.fetchone()[0] row.append(sample_surface) yield row # Return an empty row. yield [None] # Return the data rows. for taxon in self.taxa: row = [taxon] cursor.execute("SELECT sample_code, %s \ FROM data \ WHERE compiled_ecotope = ? \ AND standardised_taxon = ?" % select_field, (ecotope,taxon) ) sums_of = dict(cursor) for sample_code in sample_codes: if sample_code in sums_of: sum_of = sums_of[sample_code] if isinstance(self._do_round, int): sum_of = round(sum_of, self._do_round) row.append(sum_of) else: row.append(None) yield row # Close connection with the local database. cursor.close() connection.close()
[docs] def representatives(self): """Return an iterator object which generates the CSV data with only the representative group for each ecotope. """ connection = sqlite.connect(self._dbfile) cursor = connection.cursor() yield ['Property:', self._property] # Return the first row containing the ecotopes. row = ['Ecotope:'] for ecotope in self.ecotopes: row.append(ecotope) yield row # Return fourth row containing the group surfaces. row = ['Group surface:'] for ecotope in self.ecotopes: # Append and empty field if the ecotope has no group. if ecotope not in self._representative_groups: row.append(None) continue # Get the most representative group for this ecotope. group_id = self._representative_groups[ecotope] # Get the group surfaces for this group. cursor.execute("SELECT group_surface \ FROM normalized_sums_of \ WHERE compiled_ecotope = ? \ AND group_id = ?", (ecotope,group_id) ) row.extend(cursor.fetchone()) yield row # Return an empty row. yield [None] # Return the data rows. for taxon in self.taxa: row = [taxon] for ecotope in self.ecotopes: # Append and empty field if the ecotope has no group. if ecotope not in self._representative_groups: row.append(None) continue # Get the most representative group for this ecotope. group_id = self._representative_groups[ecotope] # Get the sum_of cursor.execute("SELECT sum_of \ FROM normalized_sums_of \ WHERE group_id = ? \ AND compiled_ecotope = ? \ AND standardised_taxon = ?", (group_id,ecotope,taxon) ) density = cursor.fetchone() if not density: row.append(None) else: density = density[0] if isinstance(self._do_round, int): density = round(density, self._do_round) row.append(density) yield row # Close connection with the local database. cursor.close() connection.close()
[docs] def export_ecotopes_grouped(self, data_type='raw'): """Return an iterator object which generates CSV data for all ecotopes. For each ecotope, the grouped data is returned. If `data_type` is set to "raw", the non-normalized group values are returned. If `data_type` is set to "normalized", the normalized group values are returned. """ for ecotope in self.ecotopes: # Update progress dialog. self.processor.pdialog_handler.increase() # Create a CSV generator. data = self.ecotope_data_grouped(ecotope, data_type) # Construct a filename. if data_type == 'raw': prefix = 'grouped' elif data_type == 'normalized': prefix = 'ambi' else: raise ValueError("Value for 'data' can be either 'raw' or 'normalized', not '%s'." % data_type) suffix = ecotope.replace(" ", "_") filename = "%s_%s_%s%s" % (prefix, self._property, suffix, self._file_extension) output_file = os.path.join(self._output_folder, filename) # Export data. self.processor.pdialog_handler.add_details("Saving %s sample groups of ecotope '%s' to %s" % (data_type, ecotope, output_file)) self.export(output_file, data)
[docs] def export_ecotopes_raw(self): """Return an iterator object which generates CSV data for all ecotopes. For each ecotope, the non-grouped data is returned. """ for ecotope in self.ecotopes: # Update progress dialog. self.processor.pdialog_handler.increase() # Create a CSV generator. data = self.ecotope_data_raw(ecotope) # Construct a filename. suffix = ecotope.replace(" ", "_") filename = "raw_%s_%s%s" % (self._property, suffix, self._file_extension) output_file = os.path.join(self._output_folder, filename) # Export data. self.processor.pdialog_handler.add_details("Saving raw data of ecotope '%s' to %s" % (ecotope, output_file)) self.export(output_file, data)
[docs] def export_representatives(self): """Return an iterator object which generates CSV data for all ecotopes. For each ecotope, only the representative group is returned. """ # Create a CSV generator. data = self.representatives() filename = "representatives_%s%s" % (self._property, self._file_extension) output_file = os.path.join(self._output_folder, filename) # Export data. self.processor.pdialog_handler.add_details("Saving representative sample groups to %s" % (output_file)) self.export(output_file, data)
[docs]class CSVExporter(Generator): """Export data in CSV format.""" def __init__(self, processor): Generator.__init__(self, processor) self._file_extension = ".csv"
[docs] def export(self, output_file, data): """Write CSV data `data` to file `output_file`. For better performance, 'data' should be an iterator object. """ writer = csv.writer(open(output_file, 'wb'), delimiter=',', quoting=csv.QUOTE_MINIMAL) writer.writerows(data)
[docs]class XLSExporter(Generator): """Export data in XLS format.""" def __init__(self, processor): Generator.__init__(self, processor) self._file_extension = ".xls"
[docs] def export(self, output_file, data): """Write CSV data `data` to file `output_file`. For better performance, 'data' should be an iterator object. """ wb = xlwt.Workbook() ws = wb.add_sheet('data') result = self.write_rows(ws, data) wb.save(output_file) #if not result: # print "Reached maximum of 256 columns for %s." % output_file
def write_rows(self, work_sheet, data): out = True for r, row in enumerate(data): for c, value in enumerate(row): if c >= 256: # Break the current row, because Excel doesn't support 256+ columns. out = False break work_sheet.write(r, c, value) return out