aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar kabeer27 <32016558+kabeer27@users.noreply.github.com>2020-07-13 04:21:56 +0000
committerGravatar GitHub <noreply@github.com>2020-07-13 14:21:56 +1000
commit9413d10e086ecb776c511de80a1531fae2b5dd8f (patch)
tree5b7c2137eaed57cb57cb0aa8efbfb37885c781a6
parent83451151b44d25a1c89e2ff31a991a8f4083f446 (diff)
[Sync cloud function] Cache file contents (#4104)
* Storing contents of project.yaml and dockerfile, to reduce api calls * Fixing comment indentation * Requested changes * Minor changes * Importing projectmetadata Co-authored-by: Kabeer Seth <kabeerseth@google.com>
-rw-r--r--infra/build/functions/sync/main.py80
-rw-r--r--infra/build/functions/sync/main_test.py63
2 files changed, 93 insertions, 50 deletions
diff --git a/infra/build/functions/sync/main.py b/infra/build/functions/sync/main.py
index 3fea68b0..5524bea4 100644
--- a/infra/build/functions/sync/main.py
+++ b/infra/build/functions/sync/main.py
@@ -30,7 +30,8 @@ VALID_PROJECT_NAME = re.compile(r'^[a-zA-Z0-9_-]+$')
DEFAULT_BUILDS_PER_DAY = 1
MAX_BUILDS_PER_DAY = 4
-ProjectMetadata = namedtuple('ProjectMetadata', 'schedule')
+ProjectMetadata = namedtuple(
+ 'ProjectMetadata', 'schedule project_yaml_contents dockerfile_contents')
class ProjectYamlError(Exception):
@@ -42,6 +43,8 @@ class Project(ndb.Model):
"""Represents an integrated OSS-Fuzz project."""
name = ndb.StringProperty()
schedule = ndb.StringProperty()
+ project_yaml_contents = ndb.TextProperty()
+ dockerfile_contents = ndb.TextProperty()
# pylint: disable=too-few-public-methods
@@ -94,6 +97,7 @@ def update_scheduler(cloud_scheduler_client, project, schedule):
cloud_scheduler_client.update(job, update_mask)
+# pylint: disable=too-many-branches
def sync_projects(cloud_scheduler_client, projects):
"""Sync projects with cloud datastore."""
for project in Project.query():
@@ -115,23 +119,39 @@ def sync_projects(cloud_scheduler_client, projects):
try:
create_scheduler(cloud_scheduler_client, project_name,
projects[project_name].schedule)
- Project(name=project_name, schedule=projects[project_name].schedule).put()
+ project_metadata = projects[project_name]
+ Project(name=project_name,
+ schedule=project_metadata.schedule,
+ project_yaml_contents=project_metadata.project_yaml_contents,
+ dockerfile_contents=project_metadata.dockerfile_contents).put()
except exceptions.GoogleAPICallError as error:
logging.error('Scheduler creation for %s failed with %s', project_name,
error)
for project in Project.query():
- if project.name not in projects or project.schedule == projects[
- project.name]:
+ if project.name not in projects:
continue
- try:
- update_scheduler(cloud_scheduler_client, project,
- projects[project.name].schedule)
- project.schedule = projects[project.name].schedule
+ project_metadata = projects[project.name]
+ project_changed = False
+ if project.schedule != project_metadata.schedule:
+ try:
+ update_scheduler(cloud_scheduler_client, project,
+ projects[project.name].schedule)
+ project.schedule = project_metadata.schedule
+ project_changed = True
+ except exceptions.GoogleAPICallError as error:
+ logging.error('Updating scheduler for %s failed with %s', project.name,
+ error)
+ if project.project_yaml_contents != project_metadata.project_yaml_contents:
+ project.project_yaml_contents = project_metadata.project_yaml_contents
+ project_changed = True
+
+ if project.dockerfile_contents != project_metadata.dockerfile_contents:
+ project.dockerfile_contents = project_metadata.dockerfile_contents
+ project_changed = True
+
+ if project_changed:
project.put()
- except exceptions.GoogleAPICallError as error:
- logging.error('Updating scheduler for %s failed with %s', project.name,
- error)
def _has_docker_file(project_contents):
@@ -140,27 +160,30 @@ def _has_docker_file(project_contents):
content_file.name == 'Dockerfile' for content_file in project_contents)
-def get_schedule(project_contents):
+def get_project_metadata(project_contents):
"""Checks for schedule parameter in yaml file else uses DEFAULT_SCHEDULE."""
for content_file in project_contents:
- if content_file.name != 'project.yaml':
- continue
- project_yaml = yaml.safe_load(content_file.decoded_content.decode('utf-8'))
- builds_per_day = project_yaml.get('builds_per_day', DEFAULT_BUILDS_PER_DAY)
- if not isinstance(builds_per_day, int) or builds_per_day not in range(
- 1, MAX_BUILDS_PER_DAY + 1):
- raise ProjectYamlError('Parameter is not an integer in range [1-4]')
+ if content_file.name == 'project.yaml':
+ project_yaml_contents = content_file.decoded_content.decode('utf-8')
+
+ if content_file.name == 'Dockerfile':
+ dockerfile_contents = content_file.decoded_content.decode('utf-8')
- # Starting at 6:00 am, next build schedules are added at 'interval' slots
- # Example for interval 2, hours = [6, 18] and schedule = '0 6,18 * * *'
+ project_yaml = yaml.safe_load(project_yaml_contents)
+ builds_per_day = project_yaml.get('builds_per_day', DEFAULT_BUILDS_PER_DAY)
+ if not isinstance(builds_per_day, int) or builds_per_day not in range(
+ 1, MAX_BUILDS_PER_DAY + 1):
+ raise ProjectYamlError('Parameter is not an integer in range [1-4]')
- interval = 24 // builds_per_day
- hours = []
- for hour in range(6, 30, interval):
- hours.append(hour % 24)
- schedule = '0 ' + ','.join(str(hour) for hour in hours) + ' * * *'
+ # Starting at 6:00 am, next build schedules are added at 'interval' slots
+ # Example for interval 2, hours = [6, 18] and schedule = '0 6,18 * * *'
+ interval = 24 // builds_per_day
+ hours = []
+ for hour in range(6, 30, interval):
+ hours.append(hour % 24)
+ schedule = '0 ' + ','.join(str(hour) for hour in hours) + ' * * *'
- return schedule
+ return ProjectMetadata(schedule, project_yaml_contents, dockerfile_contents)
def get_projects(repo):
@@ -177,8 +200,7 @@ def get_projects(repo):
continue
try:
- projects[content_file.name] = ProjectMetadata(
- schedule=get_schedule(project_contents))
+ projects[content_file.name] = get_project_metadata(project_contents)
except ProjectYamlError as error:
logging.error(
'Incorrect format for project.yaml file of %s with error %s',
diff --git a/infra/build/functions/sync/main_test.py b/infra/build/functions/sync/main_test.py
index b52a8a0c..315f0e90 100644
--- a/infra/build/functions/sync/main_test.py
+++ b/infra/build/functions/sync/main_test.py
@@ -16,7 +16,6 @@
"""Unit tests for Cloud Function sync, which syncs the list of github projects
and uploads them to the Cloud Datastore."""
-from collections import namedtuple
import os
import subprocess
import threading
@@ -30,12 +29,12 @@ from main import get_access_token
from main import get_projects
from main import sync_projects
from main import Project
+from main import ProjectMetadata
_EMULATOR_TIMEOUT = 20
_DATASTORE_READY_INDICATOR = b'is now running'
_DATASTORE_EMULATOR_PORT = 8432
_TEST_PROJECT_ID = 'test-project'
-ProjectMetadata = namedtuple('ProjectMetadata', 'schedule')
def start_datastore_emulator():
@@ -173,12 +172,18 @@ class TestDataSync(unittest.TestCase):
cloud_scheduler_client = CloudSchedulerClient()
with client.context():
- Project(name='test1', schedule='0 8 * * *').put()
- Project(name='test2', schedule='0 9 * * *').put()
+ Project(name='test1',
+ schedule='0 8 * * *',
+ project_yaml_contents='',
+ dockerfile_contents='').put()
+ Project(name='test2',
+ schedule='0 9 * * *',
+ project_yaml_contents='',
+ dockerfile_contents='').put()
projects = {
- 'test1': ProjectMetadata('0 8 * * *'),
- 'test2': ProjectMetadata('0 7 * * *')
+ 'test1': ProjectMetadata('0 8 * * *', '', ''),
+ 'test2': ProjectMetadata('0 7 * * *', '', '')
}
sync_projects(cloud_scheduler_client, projects)
@@ -194,11 +199,14 @@ class TestDataSync(unittest.TestCase):
cloud_scheduler_client = CloudSchedulerClient()
with client.context():
- Project(name='test1', schedule='0 8 * * *').put()
+ Project(name='test1',
+ schedule='0 8 * * *',
+ project_yaml_contents='',
+ dockerfile_contents='').put()
projects = {
- 'test1': ProjectMetadata('0 8 * * *'),
- 'test2': ProjectMetadata('0 7 * * *')
+ 'test1': ProjectMetadata('0 8 * * *', '', ''),
+ 'test2': ProjectMetadata('0 7 * * *', '', '')
}
sync_projects(cloud_scheduler_client, projects)
@@ -214,10 +222,16 @@ class TestDataSync(unittest.TestCase):
cloud_scheduler_client = CloudSchedulerClient()
with client.context():
- Project(name='test1', schedule='0 8 * * *').put()
- Project(name='test2', schedule='0 9 * * *').put()
-
- projects = {'test1': ProjectMetadata('0 8 * * *')}
+ Project(name='test1',
+ schedule='0 8 * * *',
+ project_yaml_contents='',
+ dockerfile_contents='').put()
+ Project(name='test2',
+ schedule='0 9 * * *',
+ project_yaml_contents='',
+ dockerfile_contents='').put()
+
+ projects = {'test1': ProjectMetadata('0 8 * * *', '', '')}
sync_projects(cloud_scheduler_client, projects)
projects_query = Project.query()
@@ -243,8 +257,12 @@ class TestDataSync(unittest.TestCase):
self.assertEqual(
get_projects(repo), {
- 'test0': ProjectMetadata('0 6,18 * * *'),
- 'test1': ProjectMetadata('0 6,14,22 * * *')
+ 'test0':
+ ProjectMetadata('0 6,18 * * *', 'builds_per_day: 2',
+ 'name: test'),
+ 'test1':
+ ProjectMetadata('0 6,14,22 * * *', 'builds_per_day: 3',
+ 'name: test')
})
def test_get_projects_no_docker_file(self):
@@ -258,8 +276,9 @@ class TestDataSync(unittest.TestCase):
Repository('test1', 'dir', 'projects/test1')
])
- self.assertEqual(get_projects(repo),
- {'test0': ProjectMetadata('0 6 * * *')})
+ self.assertEqual(
+ get_projects(repo),
+ {'test0': ProjectMetadata('0 6 * * *', 'name: test', 'name: test')})
def test_get_projects_invalid_project_name(self):
"""Testing get_projects() with invalid project name"""
@@ -275,8 +294,9 @@ class TestDataSync(unittest.TestCase):
])
])
- self.assertEqual(get_projects(repo),
- {'test0': ProjectMetadata('0 6 * * *')})
+ self.assertEqual(
+ get_projects(repo),
+ {'test0': ProjectMetadata('0 6 * * *', 'name: test', 'name: test')})
def test_get_projects_non_directory_type_project(self):
"""Testing get_projects() when a file in projects/ is not of type 'dir'."""
@@ -289,8 +309,9 @@ class TestDataSync(unittest.TestCase):
Repository('test1', 'file', 'projects/test1')
])
- self.assertEqual(get_projects(repo),
- {'test0': ProjectMetadata('0 6 * * *')})
+ self.assertEqual(
+ get_projects(repo),
+ {'test0': ProjectMetadata('0 6 * * *', 'name: test', 'name: test')})
def test_invalid_yaml_format(self):
"""Testing invalid yaml schedule parameter argument."""